From f510359793bd24d5427ef67ccaa5ab0caf6a95f2 Mon Sep 17 00:00:00 2001 From: Sebastian Date: Thu, 1 Aug 2024 11:47:24 +0200 Subject: [PATCH 01/50] Breaking change: Make Rows and Row API more consistent. A few notes: I went the path of least resistance also assuming it would break fewer folks, i.e. make Row more like Rows and thus usize -> i32. Arguably, an unsigned type might be more appropriate both for length and indexes. I understand that the i32 stems from the sqlite bindings, which returns/accepts c_ints. Yet, Statement and Row made the jump to an unsigned, probably drawing the same conclusion, whereas Rows preserved its proximity to the c-bindings. This is probably an artifact? Also going on a limb, mapping c_int -> i32 is already a non-portable choice, with precision for c_int being platform dependent. --- libsql/src/de.rs | 2 +- libsql/src/hrana/mod.rs | 29 ++++++++++++++++++---------- libsql/src/local/impls.rs | 18 ++++++++++------- libsql/src/local/rows.rs | 18 ++++++++++------- libsql/src/local/statement.rs | 21 ++++++++++---------- libsql/src/replication/connection.rs | 18 ++++++++++------- libsql/src/rows.rs | 21 +++++++++----------- 7 files changed, 72 insertions(+), 55 deletions(-) diff --git a/libsql/src/de.rs b/libsql/src/de.rs index 63ee71f598..44f231c134 100644 --- a/libsql/src/de.rs +++ b/libsql/src/de.rs @@ -68,7 +68,7 @@ impl<'de> Deserializer<'de> for RowDeserializer<'de> { visitor.visit_map(RowMapAccess { row: self.row, - idx: 0..self.row.inner.column_count(), + idx: 0..(self.row.inner.column_count() as usize), value: None, }) } diff --git a/libsql/src/hrana/mod.rs b/libsql/src/hrana/mod.rs index 9befe549de..4a6fd0c63a 100644 --- a/libsql/src/hrana/mod.rs +++ b/libsql/src/hrana/mod.rs @@ -24,7 +24,7 @@ use std::pin::Pin; use std::sync::Arc; use std::task::{Context, Poll}; -use super::rows::{RowInner, RowsInner}; +use super::rows::{ColumnsInner, RowInner, RowsInner}; pub(crate) type Result = std::result::Result; @@ -261,7 +261,12 @@ where async fn next(&mut self) -> crate::Result> { self.next().await } +} +impl ColumnsInner for HranaRows +where + S: Stream> + Send + Sync + Unpin, +{ fn column_count(&self) -> i32 { self.column_count() } @@ -303,13 +308,6 @@ impl RowInner for Row { Ok(into_value2(v)) } - fn column_name(&self, idx: i32) -> Option<&str> { - self.cols - .get(idx as usize) - .and_then(|c| c.name.as_ref()) - .map(|s| s.as_str()) - } - fn column_str(&self, idx: i32) -> crate::Result<&str> { if let Some(value) = self.inner.get(idx as usize) { if let proto::Value::Text { value } = value { @@ -321,6 +319,15 @@ impl RowInner for Row { Err(crate::Error::ColumnNotFound(idx)) } } +} + +impl ColumnsInner for Row { + fn column_name(&self, idx: i32) -> Option<&str> { + self.cols + .get(idx as usize) + .and_then(|c| c.name.as_ref()) + .map(|s| s.as_str()) + } fn column_type(&self, idx: i32) -> crate::Result { if let Some(value) = self.inner.get(idx as usize) { @@ -337,8 +344,8 @@ impl RowInner for Row { } } - fn column_count(&self) -> usize { - self.cols.len() + fn column_count(&self) -> i32 { + self.cols.len() as i32 } } @@ -417,7 +424,9 @@ impl RowsInner for StmtResultRows { inner: Box::new(row), })) } +} +impl ColumnsInner for StmtResultRows { fn column_count(&self) -> i32 { self.cols.len() as i32 } diff --git a/libsql/src/local/impls.rs b/libsql/src/local/impls.rs index 8a9a5f440e..2338317a34 100644 --- a/libsql/src/local/impls.rs +++ b/libsql/src/local/impls.rs @@ -5,7 +5,7 @@ use crate::connection::BatchRows; use crate::{ connection::Conn, params::Params, - rows::{RowInner, RowsInner}, + rows::{ColumnsInner, RowInner, RowsInner}, statement::Stmt, transaction::Tx, Column, Connection, Result, Row, Rows, Statement, Transaction, TransactionBehavior, Value, @@ -159,7 +159,9 @@ impl RowsInner for LibsqlRows { Ok(row) } +} +impl ColumnsInner for LibsqlRows { fn column_count(&self) -> i32 { self.0.column_count() } @@ -180,20 +182,22 @@ impl RowInner for LibsqlRow { self.0.get_value(idx) } - fn column_name(&self, idx: i32) -> Option<&str> { - self.0.column_name(idx) - } - fn column_str(&self, idx: i32) -> Result<&str> { self.0.get::<&str>(idx) } +} + +impl ColumnsInner for LibsqlRow { + fn column_name(&self, idx: i32) -> Option<&str> { + self.0.column_name(idx) + } fn column_type(&self, idx: i32) -> Result { self.0.column_type(idx).map(ValueType::from) } - fn column_count(&self) -> usize { - self.0.stmt.column_count() + fn column_count(&self) -> i32 { + self.0.stmt.column_count() as i32 } } diff --git a/libsql/src/local/rows.rs b/libsql/src/local/rows.rs index 7eb52d461b..4d4e622c75 100644 --- a/libsql/src/local/rows.rs +++ b/libsql/src/local/rows.rs @@ -1,6 +1,6 @@ use crate::local::{Connection, Statement}; use crate::params::Params; -use crate::rows::{RowInner, RowsInner}; +use crate::rows::{ColumnsInner, RowInner, RowsInner}; use crate::{errors, Error, Result}; use crate::{Value, ValueRef}; use libsql_sys::ValueType; @@ -213,7 +213,9 @@ impl RowsInner for BatchedRows { Ok(None) } } +} +impl ColumnsInner for BatchedRows { fn column_count(&self) -> i32 { self.cols.len() as i32 } @@ -244,10 +246,6 @@ impl RowInner for BatchedRow { .ok_or(Error::InvalidColumnIndex) } - fn column_name(&self, idx: i32) -> Option<&str> { - self.cols.get(idx as usize).map(|c| c.0.as_str()) - } - fn column_str(&self, idx: i32) -> Result<&str> { self.row .get(idx as usize) @@ -258,9 +256,15 @@ impl RowInner for BatchedRow { .ok_or(Error::InvalidColumnType) }) } +} + +impl ColumnsInner for BatchedRow { + fn column_name(&self, idx: i32) -> Option<&str> { + self.cols.get(idx as usize).map(|c| c.0.as_str()) + } - fn column_count(&self) -> usize { - self.cols.len() + fn column_count(&self) -> i32 { + self.cols.len() as i32 } fn column_type(&self, idx: i32) -> Result { diff --git a/libsql/src/local/statement.rs b/libsql/src/local/statement.rs index 70116a152e..c28a66f18f 100644 --- a/libsql/src/local/statement.rs +++ b/libsql/src/local/statement.rs @@ -250,15 +250,15 @@ impl Statement { /// sure that current statement has already been stepped once before /// calling this method. pub fn column_names(&self) -> Vec<&str> { - let n = self.column_count(); - let mut cols = Vec::with_capacity(n); - for i in 0..n { - let s = self.column_name(i); - if let Some(s) = s { - cols.push(s); - } - } - cols + let n = self.column_count(); + let mut cols = Vec::with_capacity(n); + for i in 0..n { + let s = self.column_name(i); + if let Some(s) = s { + cols.push(s); + } + } + cols } /// Return the number of columns in the result set returned by the prepared @@ -314,12 +314,11 @@ impl Statement { /// the specified `name`. pub fn column_index(&self, name: &str) -> Result { let bytes = name.as_bytes(); - let n = self.column_count() as i32; + let n = self.column_count(); for i in 0..n { // Note: `column_name` is only fallible if `i` is out of bounds, // which we've already checked. let col_name = self - .inner .column_name(i) .ok_or_else(|| Error::InvalidColumnName(name.to_string()))?; if bytes.eq_ignore_ascii_case(col_name.as_bytes()) { diff --git a/libsql/src/replication/connection.rs b/libsql/src/replication/connection.rs index c82f523559..c720838798 100644 --- a/libsql/src/replication/connection.rs +++ b/libsql/src/replication/connection.rs @@ -11,7 +11,7 @@ use parking_lot::Mutex; use crate::parser; use crate::parser::StmtKind; -use crate::rows::{RowInner, RowsInner}; +use crate::rows::{ColumnsInner, RowInner, RowsInner}; use crate::statement::Stmt; use crate::transaction::Tx; use crate::{ @@ -780,7 +780,9 @@ impl RowsInner for RemoteRows { let row = RemoteRow(values, self.0.column_descriptions.clone()); Ok(Some(row).map(Box::new).map(|inner| Row { inner })) } +} +impl ColumnsInner for RemoteRows { fn column_count(&self) -> i32 { self.0.column_descriptions.len() as i32 } @@ -813,10 +815,6 @@ impl RowInner for RemoteRow { .ok_or(Error::InvalidColumnIndex) } - fn column_name(&self, idx: i32) -> Option<&str> { - self.1.get(idx as usize).map(|s| s.name.as_str()) - } - fn column_str(&self, idx: i32) -> Result<&str> { let value = self.0.get(idx as usize).ok_or(Error::InvalidColumnIndex)?; @@ -825,6 +823,12 @@ impl RowInner for RemoteRow { _ => Err(Error::InvalidColumnType), } } +} + +impl ColumnsInner for RemoteRow { + fn column_name(&self, idx: i32) -> Option<&str> { + self.1.get(idx as usize).map(|s| s.name.as_str()) + } fn column_type(&self, idx: i32) -> Result { let col = self.1.get(idx as usize).unwrap(); @@ -835,8 +839,8 @@ impl RowInner for RemoteRow { .ok_or(Error::InvalidColumnType) } - fn column_count(&self) -> usize { - self.1.len() + fn column_count(&self) -> i32 { + self.1.len() as i32 } } diff --git a/libsql/src/rows.rs b/libsql/src/rows.rs index b97aeac203..a10d82b827 100644 --- a/libsql/src/rows.rs +++ b/libsql/src/rows.rs @@ -38,14 +38,8 @@ impl Column<'_> { } #[async_trait::async_trait] -pub(crate) trait RowsInner { +pub(crate) trait RowsInner: ColumnsInner { async fn next(&mut self) -> Result>; - - fn column_count(&self) -> i32; - - fn column_name(&self, idx: i32) -> Option<&str>; - - fn column_type(&self, idx: i32) -> Result; } /// A set of rows returned from a connection. @@ -131,7 +125,7 @@ impl Row { } /// Get the count of columns in this set of rows. - pub fn column_count(&self) -> usize { + pub fn column_count(&self) -> i32 { self.inner.column_count() } @@ -284,12 +278,15 @@ where } impl Sealed for Option {} -pub(crate) trait RowInner: fmt::Debug { - fn column_value(&self, idx: i32) -> Result; - fn column_str(&self, idx: i32) -> Result<&str>; +pub(crate) trait ColumnsInner { fn column_name(&self, idx: i32) -> Option<&str>; fn column_type(&self, idx: i32) -> Result; - fn column_count(&self) -> usize; + fn column_count(&self) -> i32; +} + +pub(crate) trait RowInner: ColumnsInner + fmt::Debug { + fn column_value(&self, idx: i32) -> Result; + fn column_str(&self, idx: i32) -> Result<&str>; } mod sealed { From a68f042914cd637904123c7375e297a00ac5cecb Mon Sep 17 00:00:00 2001 From: Lucio Franco Date: Fri, 2 Aug 2024 09:26:28 -0700 Subject: [PATCH 02/50] libsql: release v0.5.0 --- Cargo.lock | 12 ++++++------ Cargo.toml | 2 +- libsql-ffi/Cargo.toml | 2 +- libsql-replication/Cargo.toml | 4 ++-- libsql-sys/Cargo.toml | 4 ++-- libsql/Cargo.toml | 8 ++++---- vendored/rusqlite/Cargo.toml | 4 ++-- vendored/sqlite3-parser/Cargo.toml | 2 +- 8 files changed, 19 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 04ae728065..17cfc0e090 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3430,7 +3430,7 @@ dependencies = [ [[package]] name = "libsql" -version = "0.5.0-alpha.2" +version = "0.5.0" dependencies = [ "anyhow", "async-stream", @@ -3488,7 +3488,7 @@ dependencies = [ [[package]] name = "libsql-ffi" -version = "0.3.0" +version = "0.4.0" dependencies = [ "bindgen 0.66.1", "cc", @@ -3508,7 +3508,7 @@ dependencies = [ [[package]] name = "libsql-rusqlite" -version = "0.31.0" +version = "0.32.0" dependencies = [ "bencher", "bitflags 2.6.0", @@ -3631,7 +3631,7 @@ dependencies = [ [[package]] name = "libsql-sqlite3-parser" -version = "0.12.0" +version = "0.13.0" dependencies = [ "bitflags 2.6.0", "cc", @@ -3687,7 +3687,7 @@ dependencies = [ [[package]] name = "libsql-sys" -version = "0.6.0" +version = "0.7.0" dependencies = [ "bytes", "libsql-ffi", @@ -3768,7 +3768,7 @@ dependencies = [ [[package]] name = "libsql_replication" -version = "0.4.0" +version = "0.5.0" dependencies = [ "aes", "arbitrary", diff --git a/Cargo.toml b/Cargo.toml index 92487ecdd0..9381fb83f3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -34,7 +34,7 @@ codegen-units = 1 panic = "unwind" [workspace.dependencies] -rusqlite = { package = "libsql-rusqlite", path = "vendored/rusqlite", version = "0.31", default-features = false, features = [ +rusqlite = { package = "libsql-rusqlite", path = "vendored/rusqlite", version = "0.32", default-features = false, features = [ "libsql-experimental", "column_decltype", "load_extension", diff --git a/libsql-ffi/Cargo.toml b/libsql-ffi/Cargo.toml index 9b5cbced11..ef9ade1726 100644 --- a/libsql-ffi/Cargo.toml +++ b/libsql-ffi/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "libsql-ffi" -version = "0.3.0" +version = "0.4.0" edition = "2021" build = "build.rs" license = "MIT" diff --git a/libsql-replication/Cargo.toml b/libsql-replication/Cargo.toml index 56f00d7a7d..d2a9431cba 100644 --- a/libsql-replication/Cargo.toml +++ b/libsql-replication/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "libsql_replication" -version = "0.4.0" +version = "0.5.0" edition = "2021" description = "libSQL replication protocol" repository = "https://github.com/tursodatabase/libsql" @@ -11,7 +11,7 @@ license = "MIT" [dependencies] tonic = { version = "0.11", features = ["tls"] } prost = "0.12" -libsql-sys = { version = "0.6", path = "../libsql-sys", default-features = false, features = ["wal", "rusqlite", "api"] } +libsql-sys = { version = "0.7", path = "../libsql-sys", default-features = false, features = ["wal", "rusqlite", "api"] } rusqlite = { workspace = true } parking_lot = "0.12.1" bytes = { version = "1.5.0", features = ["serde"] } diff --git a/libsql-sys/Cargo.toml b/libsql-sys/Cargo.toml index 26dd091ea9..8351012d9f 100644 --- a/libsql-sys/Cargo.toml +++ b/libsql-sys/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "libsql-sys" -version = "0.6.0" +version = "0.7.0" edition = "2021" license = "MIT" description = "Native bindings to libSQL" @@ -12,7 +12,7 @@ categories = ["external-ffi-bindings"] [dependencies] bytes = "1.5.0" -libsql-ffi = { version = "0.3", path = "../libsql-ffi/" } +libsql-ffi = { version = "0.4", path = "../libsql-ffi/" } once_cell = "1.18.0" rusqlite = { workspace = true, features = ["trace"], optional = true } tracing = "0.1.37" diff --git a/libsql/Cargo.toml b/libsql/Cargo.toml index fa89cc68ad..efae2abea3 100644 --- a/libsql/Cargo.toml +++ b/libsql/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "libsql" -version = "0.5.0-alpha.2" +version = "0.5.0" edition = "2021" description = "libSQL library: the main gateway for interacting with the database" repository = "https://github.com/tursodatabase/libsql" @@ -11,7 +11,7 @@ tracing = { version = "0.1.37", default-features = false } thiserror = "1.0.40" futures = { version = "0.3.28", optional = true } -libsql-sys = { version = "0.6", path = "../libsql-sys", optional = true } +libsql-sys = { version = "0.7", path = "../libsql-sys", optional = true } libsql-hrana = { version = "0.2", path = "../libsql-hrana", optional = true } tokio = { version = "1.29.1", features = ["sync"], optional = true } tokio-util = { version = "0.7", features = ["io-util", "codec"], optional = true } @@ -37,10 +37,10 @@ tower-http = { version = "0.4.4", features = ["trace", "set-header", "util"], op http = { version = "0.2", optional = true } zerocopy = { version = "0.7.28", optional = true } -sqlite3-parser = { package = "libsql-sqlite3-parser", path = "../vendored/sqlite3-parser", version = "0.12", optional = true } +sqlite3-parser = { package = "libsql-sqlite3-parser", path = "../vendored/sqlite3-parser", version = "0.13", optional = true } fallible-iterator = { version = "0.3", optional = true } -libsql_replication = { version = "0.4", path = "../libsql-replication", optional = true } +libsql_replication = { version = "0.5", path = "../libsql-replication", optional = true } async-stream = { version = "0.3.5", optional = true } [dev-dependencies] diff --git a/vendored/rusqlite/Cargo.toml b/vendored/rusqlite/Cargo.toml index 2d332f3279..d9fbcc525e 100644 --- a/vendored/rusqlite/Cargo.toml +++ b/vendored/rusqlite/Cargo.toml @@ -1,7 +1,7 @@ [package] name = "libsql-rusqlite" # Note: Update version in README.md when you change this. -version = "0.31.0" +version = "0.32.0" authors = ["The rusqlite developers"] edition = "2018" description = "Ergonomic wrapper for SQLite (libsql fork)" @@ -109,7 +109,7 @@ fallible-iterator = "0.2" fallible-streaming-iterator = "0.1" uuid = { version = "1.0", optional = true } smallvec = "1.6.1" -libsql-ffi = { version = "0.3", path = "../../libsql-ffi" } +libsql-ffi = { version = "0.4", path = "../../libsql-ffi" } [dev-dependencies] doc-comment = "0.3" diff --git a/vendored/sqlite3-parser/Cargo.toml b/vendored/sqlite3-parser/Cargo.toml index 5ed9e31f4d..0381ac1d99 100644 --- a/vendored/sqlite3-parser/Cargo.toml +++ b/vendored/sqlite3-parser/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "libsql-sqlite3-parser" -version = "0.12.0" +version = "0.13.0" edition = "2021" authors = ["gwenn"] description = "SQL parser (as understood by SQLite) (libsql fork)" From 0917f84cde2d272141eaa79d933ed0736fb668e5 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Fri, 2 Aug 2024 23:54:55 +0200 Subject: [PATCH 03/50] introduce NamespaceConfigurator --- .../src/namespace/configurator/mod.rs | 56 ++++++ .../src/namespace/configurator/primary.rs | 128 ++++++++++++ .../src/namespace/configurator/replica.rs | 190 ++++++++++++++++++ libsql-server/src/namespace/mod.rs | 17 +- 4 files changed, 383 insertions(+), 8 deletions(-) create mode 100644 libsql-server/src/namespace/configurator/mod.rs create mode 100644 libsql-server/src/namespace/configurator/primary.rs create mode 100644 libsql-server/src/namespace/configurator/replica.rs diff --git a/libsql-server/src/namespace/configurator/mod.rs b/libsql-server/src/namespace/configurator/mod.rs new file mode 100644 index 0000000000..0caa1de149 --- /dev/null +++ b/libsql-server/src/namespace/configurator/mod.rs @@ -0,0 +1,56 @@ +use std::pin::Pin; + +use futures::Future; + +use super::broadcasters::BroadcasterHandle; +use super::meta_store::MetaStoreHandle; +use super::{NamespaceConfig, NamespaceName, NamespaceStore, ResetCb, ResolveNamespacePathFn, RestoreOption}; + +mod replica; +mod primary; + +type DynConfigurator = Box; + +#[derive(Default)] +struct NamespaceConfigurators { + replica_configurator: Option, + primary_configurator: Option, + schema_configurator: Option, +} + +impl NamespaceConfigurators { + pub fn with_primary( + &mut self, + c: impl ConfigureNamespace + Send + Sync + 'static, + ) -> &mut Self { + self.primary_configurator = Some(Box::new(c)); + self + } + + pub fn with_replica( + &mut self, + c: impl ConfigureNamespace + Send + Sync + 'static, + ) -> &mut Self { + self.replica_configurator = Some(Box::new(c)); + self + } + + pub fn with_schema(&mut self, c: impl ConfigureNamespace + Send + Sync + 'static) -> &mut Self { + self.schema_configurator = Some(Box::new(c)); + self + } +} + +pub trait ConfigureNamespace { + fn setup<'a>( + &'a self, + ns_config: &'a NamespaceConfig, + db_config: MetaStoreHandle, + restore_option: RestoreOption, + name: &'a NamespaceName, + reset: ResetCb, + resolve_attach_path: ResolveNamespacePathFn, + store: NamespaceStore, + broadcaster: BroadcasterHandle, + ) -> Pin> + Send + 'a>>; +} diff --git a/libsql-server/src/namespace/configurator/primary.rs b/libsql-server/src/namespace/configurator/primary.rs new file mode 100644 index 0000000000..f28d288a97 --- /dev/null +++ b/libsql-server/src/namespace/configurator/primary.rs @@ -0,0 +1,128 @@ +use std::sync::atomic::{AtomicBool, Ordering}; +use std::{path::Path, pin::Pin, sync::Arc}; + +use futures::prelude::Future; +use tokio::task::JoinSet; + +use crate::connection::MakeConnection; +use crate::database::{Database, PrimaryDatabase}; +use crate::namespace::{Namespace, NamespaceConfig, NamespaceName, NamespaceStore, ResetCb, ResolveNamespacePathFn, RestoreOption}; +use crate::namespace::meta_store::MetaStoreHandle; +use crate::namespace::broadcasters::BroadcasterHandle; +use crate::run_periodic_checkpoint; +use crate::schema::{has_pending_migration_task, setup_migration_table}; + +use super::ConfigureNamespace; + +pub struct PrimaryConfigurator; + +impl ConfigureNamespace for PrimaryConfigurator { + fn setup<'a>( + &'a self, + config: &'a NamespaceConfig, + meta_store_handle: MetaStoreHandle, + restore_option: RestoreOption, + name: &'a NamespaceName, + _reset: ResetCb, + resolve_attach_path: ResolveNamespacePathFn, + _store: NamespaceStore, + broadcaster: BroadcasterHandle, + ) -> Pin> + Send + 'a>> + { + Box::pin(async move { + let db_path: Arc = config.base_path.join("dbs").join(name.as_str()).into(); + let fresh_namespace = !db_path.try_exists()?; + // FIXME: make that truly atomic. explore the idea of using temp directories, and it's implications + match try_new_primary( + config, + name.clone(), + meta_store_handle, + restore_option, + resolve_attach_path, + db_path.clone(), + broadcaster, + ) + .await + { + Ok(this) => Ok(this), + Err(e) if fresh_namespace => { + tracing::error!("an error occured while deleting creating namespace, cleaning..."); + if let Err(e) = tokio::fs::remove_dir_all(&db_path).await { + tracing::error!("failed to remove dirty namespace directory: {e}") + } + Err(e) + } + Err(e) => Err(e), + } + }) + } +} + +#[tracing::instrument(skip_all, fields(namespace))] +async fn try_new_primary( + ns_config: &NamespaceConfig, + namespace: NamespaceName, + meta_store_handle: MetaStoreHandle, + restore_option: RestoreOption, + resolve_attach_path: ResolveNamespacePathFn, + db_path: Arc, + broadcaster: BroadcasterHandle, +) -> crate::Result { + let mut join_set = JoinSet::new(); + + tokio::fs::create_dir_all(&db_path).await?; + + let block_writes = Arc::new(AtomicBool::new(false)); + let (connection_maker, wal_wrapper, stats) = Namespace::make_primary_connection_maker( + ns_config, + &meta_store_handle, + &db_path, + &namespace, + restore_option, + block_writes.clone(), + &mut join_set, + resolve_attach_path, + broadcaster, + ) + .await?; + let connection_maker = Arc::new(connection_maker); + + if meta_store_handle.get().shared_schema_name.is_some() { + let block_writes = block_writes.clone(); + let conn = connection_maker.create().await?; + tokio::task::spawn_blocking(move || { + conn.with_raw(|conn| -> crate::Result<()> { + setup_migration_table(conn)?; + if has_pending_migration_task(conn)? { + block_writes.store(true, Ordering::SeqCst); + } + Ok(()) + }) + }) + .await + .unwrap()?; + } + + if let Some(checkpoint_interval) = ns_config.checkpoint_interval { + join_set.spawn(run_periodic_checkpoint( + connection_maker.clone(), + checkpoint_interval, + namespace.clone(), + )); + } + + tracing::debug!("Done making new primary"); + + Ok(Namespace { + tasks: join_set, + db: Database::Primary(PrimaryDatabase { + wal_wrapper, + connection_maker, + block_writes, + }), + name: namespace, + stats, + db_config_store: meta_store_handle, + path: db_path.into(), + }) +} diff --git a/libsql-server/src/namespace/configurator/replica.rs b/libsql-server/src/namespace/configurator/replica.rs new file mode 100644 index 0000000000..4d3ca1dadf --- /dev/null +++ b/libsql-server/src/namespace/configurator/replica.rs @@ -0,0 +1,190 @@ +use std::pin::Pin; +use std::sync::Arc; + +use futures::Future; +use libsql_replication::rpc::replication::replication_log_client::ReplicationLogClient; +use tokio::task::JoinSet; + +use crate::connection::write_proxy::MakeWriteProxyConn; +use crate::connection::MakeConnection; +use crate::database::{Database, ReplicaDatabase}; +use crate::namespace::broadcasters::BroadcasterHandle; +use crate::namespace::meta_store::MetaStoreHandle; +use crate::namespace::{Namespace, RestoreOption}; +use crate::namespace::{ + make_stats, NamespaceConfig, NamespaceName, NamespaceStore, ResetCb, ResetOp, + ResolveNamespacePathFn, +}; +use crate::{DB_CREATE_TIMEOUT, DEFAULT_AUTO_CHECKPOINT}; + +use super::ConfigureNamespace; + +pub struct ReplicaConfigurator; + +impl ConfigureNamespace for ReplicaConfigurator { + fn setup<'a>( + &'a self, + config: &'a NamespaceConfig, + meta_store_handle: MetaStoreHandle, + restore_option: RestoreOption, + name: &'a NamespaceName, + reset: ResetCb, + resolve_attach_path: ResolveNamespacePathFn, + store: NamespaceStore, + broadcaster: BroadcasterHandle, + ) -> Pin> + Send + 'a>> + { + Box::pin(async move { + tracing::debug!("creating replica namespace"); + let db_path = config.base_path.join("dbs").join(name.as_str()); + let channel = config.channel.clone().expect("bad replica config"); + let uri = config.uri.clone().expect("bad replica config"); + + let rpc_client = ReplicationLogClient::with_origin(channel.clone(), uri.clone()); + let client = crate::replication::replicator_client::Client::new( + name.clone(), + rpc_client, + &db_path, + meta_store_handle.clone(), + store.clone(), + ) + .await?; + let applied_frame_no_receiver = client.current_frame_no_notifier.subscribe(); + let mut replicator = libsql_replication::replicator::Replicator::new( + client, + db_path.join("data"), + DEFAULT_AUTO_CHECKPOINT, + config.encryption_config.clone(), + ) + .await?; + + tracing::debug!("try perform handshake"); + // force a handshake now, to retrieve the primary's current replication index + match replicator.try_perform_handshake().await { + Err(libsql_replication::replicator::Error::Meta( + libsql_replication::meta::Error::LogIncompatible, + )) => { + tracing::error!( + "trying to replicate incompatible logs, reseting replica and nuking db dir" + ); + std::fs::remove_dir_all(&db_path).unwrap(); + return self.setup( + config, + meta_store_handle, + restore_option, + name, + reset, + resolve_attach_path, + store, + broadcaster, + ) + .await; + } + Err(e) => Err(e)?, + Ok(_) => (), + } + + tracing::debug!("done performing handshake"); + + let primary_current_replicatio_index = replicator.client_mut().primary_replication_index; + + let mut join_set = JoinSet::new(); + let namespace = name.clone(); + join_set.spawn(async move { + use libsql_replication::replicator::Error; + loop { + match replicator.run().await { + err @ Error::Fatal(_) => Err(err)?, + err @ Error::NamespaceDoesntExist => { + tracing::error!("namespace {namespace} doesn't exist, destroying..."); + (reset)(ResetOp::Destroy(namespace.clone())); + Err(err)?; + } + e @ Error::Injector(_) => { + tracing::error!("potential corruption detected while replicating, reseting replica: {e}"); + (reset)(ResetOp::Reset(namespace.clone())); + Err(e)?; + }, + Error::Meta(err) => { + use libsql_replication::meta::Error; + match err { + Error::LogIncompatible => { + tracing::error!("trying to replicate incompatible logs, reseting replica"); + (reset)(ResetOp::Reset(namespace.clone())); + Err(err)?; + } + Error::InvalidMetaFile + | Error::Io(_) + | Error::InvalidLogId + | Error::FailedToCommit(_) + | Error::InvalidReplicationPath + | Error::RequiresCleanDatabase => { + // We retry from last frame index? + tracing::warn!("non-fatal replication error, retrying from last commit index: {err}"); + }, + } + } + e @ (Error::Internal(_) + | Error::Client(_) + | Error::PrimaryHandshakeTimeout + | Error::NeedSnapshot) => { + tracing::warn!("non-fatal replication error, retrying from last commit index: {e}"); + }, + Error::NoHandshake => { + // not strictly necessary, but in case the handshake error goes uncaught, + // we reset the client state. + replicator.client_mut().reset_token(); + } + Error::SnapshotPending => unreachable!(), + } + } + }); + + let stats = make_stats( + &db_path, + &mut join_set, + meta_store_handle.clone(), + config.stats_sender.clone(), + name.clone(), + applied_frame_no_receiver.clone(), + config.encryption_config.clone(), + ) + .await?; + + let connection_maker = MakeWriteProxyConn::new( + db_path.clone(), + config.extensions.clone(), + channel.clone(), + uri.clone(), + stats.clone(), + broadcaster, + meta_store_handle.clone(), + applied_frame_no_receiver, + config.max_response_size, + config.max_total_response_size, + primary_current_replicatio_index, + config.encryption_config.clone(), + resolve_attach_path, + config.make_wal_manager.clone(), + ) + .await? + .throttled( + config.max_concurrent_connections.clone(), + Some(DB_CREATE_TIMEOUT), + config.max_total_response_size, + config.max_concurrent_requests, + ); + + Ok(Namespace { + tasks: join_set, + db: Database::Replica(ReplicaDatabase { + connection_maker: Arc::new(connection_maker), + }), + name: name.clone(), + stats, + db_config_store: meta_store_handle, + path: db_path.into(), + }) + }) + } +} diff --git a/libsql-server/src/namespace/mod.rs b/libsql-server/src/namespace/mod.rs index 6e48e7f1d8..6a04b11fb8 100644 --- a/libsql-server/src/namespace/mod.rs +++ b/libsql-server/src/namespace/mod.rs @@ -1,11 +1,3 @@ -pub mod broadcasters; -mod fork; -pub mod meta_store; -mod name; -pub mod replication_wal; -mod schema_lock; -mod store; - use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::{Arc, Weak}; @@ -57,6 +49,15 @@ pub use self::name::NamespaceName; use self::replication_wal::{make_replication_wal_wrapper, ReplicationWalWrapper}; pub use self::store::NamespaceStore; +pub mod broadcasters; +mod fork; +pub mod meta_store; +mod name; +pub mod replication_wal; +mod schema_lock; +mod store; +mod configurator; + pub type ResetCb = Box; pub type ResolveNamespacePathFn = Arc crate::Result> + Sync + Send + 'static>; From f9daa9e08f58efdebd52acb4d45435266bec34af Mon Sep 17 00:00:00 2001 From: ad hoc Date: Sat, 3 Aug 2024 00:00:45 +0200 Subject: [PATCH 04/50] add configurators to namespace store --- libsql-server/src/namespace/configurator/mod.rs | 2 +- libsql-server/src/namespace/store.rs | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/libsql-server/src/namespace/configurator/mod.rs b/libsql-server/src/namespace/configurator/mod.rs index 0caa1de149..a692f75652 100644 --- a/libsql-server/src/namespace/configurator/mod.rs +++ b/libsql-server/src/namespace/configurator/mod.rs @@ -12,7 +12,7 @@ mod primary; type DynConfigurator = Box; #[derive(Default)] -struct NamespaceConfigurators { +pub(crate) struct NamespaceConfigurators { replica_configurator: Option, primary_configurator: Option, schema_configurator: Option, diff --git a/libsql-server/src/namespace/store.rs b/libsql-server/src/namespace/store.rs index e0147fc2e8..984a520154 100644 --- a/libsql-server/src/namespace/store.rs +++ b/libsql-server/src/namespace/store.rs @@ -19,6 +19,7 @@ use crate::namespace::{NamespaceBottomlessDbId, NamespaceBottomlessDbIdInit, Nam use crate::stats::Stats; use super::broadcasters::{BroadcasterHandle, BroadcasterRegistry}; +use super::configurator::NamespaceConfigurators; use super::meta_store::{MetaStore, MetaStoreHandle}; use super::schema_lock::SchemaLocksRegistry; use super::{Namespace, NamespaceConfig, ResetCb, ResetOp, ResolveNamespacePathFn, RestoreOption}; @@ -47,6 +48,7 @@ pub struct NamespaceStoreInner { pub config: NamespaceConfig, schema_locks: SchemaLocksRegistry, broadcasters: BroadcasterRegistry, + configurators: NamespaceConfigurators, } impl NamespaceStore { @@ -90,6 +92,7 @@ impl NamespaceStore { config, schema_locks: Default::default(), broadcasters: Default::default(), + configurators: NamespaceConfigurators::default(), }), }) } From 8b377a6e06dfc051bedb68976577adb8de339f40 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Sat, 3 Aug 2024 21:18:51 +0200 Subject: [PATCH 05/50] add shcema configurator --- .../src/namespace/configurator/schema.rs | 65 +++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 libsql-server/src/namespace/configurator/schema.rs diff --git a/libsql-server/src/namespace/configurator/schema.rs b/libsql-server/src/namespace/configurator/schema.rs new file mode 100644 index 0000000000..864b75239f --- /dev/null +++ b/libsql-server/src/namespace/configurator/schema.rs @@ -0,0 +1,65 @@ +use std::sync::{atomic::AtomicBool, Arc}; + +use futures::prelude::Future; +use tokio::task::JoinSet; + +use crate::database::{Database, SchemaDatabase}; +use crate::namespace::meta_store::MetaStoreHandle; +use crate::namespace::{ + Namespace, NamespaceConfig, NamespaceName, NamespaceStore, + ResetCb, ResolveNamespacePathFn, RestoreOption, +}; +use crate::namespace::broadcasters::BroadcasterHandle; + +use super::ConfigureNamespace; + +pub struct SchemaConfigurator; + +impl ConfigureNamespace for SchemaConfigurator { + fn setup<'a>( + &'a self, + ns_config: &'a NamespaceConfig, + db_config: MetaStoreHandle, + restore_option: RestoreOption, + name: &'a NamespaceName, + _reset: ResetCb, + resolve_attach_path: ResolveNamespacePathFn, + _store: NamespaceStore, + broadcaster: BroadcasterHandle, + ) -> std::pin::Pin> + Send + 'a>> { + Box::pin(async move { + let mut join_set = JoinSet::new(); + let db_path = ns_config.base_path.join("dbs").join(name.as_str()); + + tokio::fs::create_dir_all(&db_path).await?; + + let (connection_maker, wal_manager, stats) = Namespace::make_primary_connection_maker( + ns_config, + &db_config, + &db_path, + &name, + restore_option, + Arc::new(AtomicBool::new(false)), // this is always false for schema + &mut join_set, + resolve_attach_path, + broadcaster, + ) + .await?; + + Ok(Namespace { + db: Database::Schema(SchemaDatabase::new( + ns_config.migration_scheduler.clone(), + name.clone(), + connection_maker, + wal_manager, + db_config.clone(), + )), + name: name.clone(), + tasks: join_set, + stats, + db_config_store: db_config.clone(), + path: db_path.into(), + }) + }) + } +} From 978dd7147d0304397f261d918c70c2806eff82a5 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Sat, 3 Aug 2024 21:19:00 +0200 Subject: [PATCH 06/50] instanciate namesapces from configurators --- .../src/namespace/configurator/mod.rs | 25 +- libsql-server/src/namespace/fork.rs | 26 +- libsql-server/src/namespace/mod.rs | 374 +----------------- libsql-server/src/namespace/store.rs | 76 ++-- 4 files changed, 74 insertions(+), 427 deletions(-) diff --git a/libsql-server/src/namespace/configurator/mod.rs b/libsql-server/src/namespace/configurator/mod.rs index a692f75652..d3cd390b34 100644 --- a/libsql-server/src/namespace/configurator/mod.rs +++ b/libsql-server/src/namespace/configurator/mod.rs @@ -8,14 +8,19 @@ use super::{NamespaceConfig, NamespaceName, NamespaceStore, ResetCb, ResolveName mod replica; mod primary; +mod schema; -type DynConfigurator = Box; +pub use replica::ReplicaConfigurator; +pub use primary::PrimaryConfigurator; +pub use schema::SchemaConfigurator; + +type DynConfigurator = dyn ConfigureNamespace + Send + Sync + 'static; #[derive(Default)] pub(crate) struct NamespaceConfigurators { - replica_configurator: Option, - primary_configurator: Option, - schema_configurator: Option, + replica_configurator: Option>, + primary_configurator: Option>, + schema_configurator: Option>, } impl NamespaceConfigurators { @@ -39,6 +44,18 @@ impl NamespaceConfigurators { self.schema_configurator = Some(Box::new(c)); self } + + pub fn configure_schema(&self) -> crate::Result<&DynConfigurator> { + self.schema_configurator.as_deref().ok_or_else(|| todo!()) + } + + pub fn configure_primary(&self) -> crate::Result<&DynConfigurator> { + self.primary_configurator.as_deref().ok_or_else(|| todo!()) + } + + pub fn configure_replica(&self) -> crate::Result<&DynConfigurator> { + self.replica_configurator.as_deref().ok_or_else(|| todo!()) + } } pub trait ConfigureNamespace { diff --git a/libsql-server/src/namespace/fork.rs b/libsql-server/src/namespace/fork.rs index dfa053b43d..f25bf7a9a9 100644 --- a/libsql-server/src/namespace/fork.rs +++ b/libsql-server/src/namespace/fork.rs @@ -12,14 +12,12 @@ use tokio::io::{AsyncSeekExt, AsyncWriteExt}; use tokio::time::Duration; use tokio_stream::StreamExt; -use crate::namespace::ResolveNamespacePathFn; use crate::replication::primary::frame_stream::FrameStream; use crate::replication::{LogReadError, ReplicationLogger}; use crate::{BLOCKING_RT, LIBSQL_PAGE_SIZE}; -use super::broadcasters::BroadcasterHandle; use super::meta_store::MetaStoreHandle; -use super::{Namespace, NamespaceConfig, NamespaceName, NamespaceStore, RestoreOption}; +use super::{NamespaceName, NamespaceStore, RestoreOption}; type Result = crate::Result; @@ -54,16 +52,13 @@ async fn write_frame(frame: &FrameBorrowed, temp_file: &mut tokio::fs::File) -> Ok(()) } -pub struct ForkTask<'a> { +pub struct ForkTask { pub base_path: Arc, pub logger: Arc, pub to_namespace: NamespaceName, pub to_config: MetaStoreHandle, pub restore_to: Option, - pub ns_config: &'a NamespaceConfig, - pub resolve_attach: ResolveNamespacePathFn, pub store: NamespaceStore, - pub broadcaster: BroadcasterHandle, } pub struct PointInTimeRestore { @@ -71,7 +66,7 @@ pub struct PointInTimeRestore { pub replicator_options: bottomless::replicator::Options, } -impl<'a> ForkTask<'a> { +impl ForkTask { pub async fn fork(self) -> Result { let base_path = self.base_path.clone(); let dest_namespace = self.to_namespace.clone(); @@ -105,18 +100,9 @@ impl<'a> ForkTask<'a> { let dest_path = self.base_path.join("dbs").join(self.to_namespace.as_str()); tokio::fs::rename(temp_dir.path(), dest_path).await?; - Namespace::from_config( - self.ns_config, - self.to_config.clone(), - RestoreOption::Latest, - &self.to_namespace, - Box::new(|_op| {}), - self.resolve_attach.clone(), - self.store.clone(), - self.broadcaster, - ) - .await - .map_err(|e| ForkError::CreateNamespace(Box::new(e))) + self.store.make_namespace(&self.to_namespace, self.to_config, RestoreOption::Latest) + .await + .map_err(|e| ForkError::CreateNamespace(Box::new(e))) } /// Restores the database state from a local log file. diff --git a/libsql-server/src/namespace/mod.rs b/libsql-server/src/namespace/mod.rs index 6a04b11fb8..41bb3ab9cc 100644 --- a/libsql-server/src/namespace/mod.rs +++ b/libsql-server/src/namespace/mod.rs @@ -1,5 +1,5 @@ use std::path::{Path, PathBuf}; -use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::atomic::AtomicBool; use std::sync::{Arc, Weak}; use anyhow::{Context as _, Error}; @@ -10,7 +10,6 @@ use chrono::NaiveDateTime; use enclose::enclose; use futures_core::{Future, Stream}; use hyper::Uri; -use libsql_replication::rpc::replication::replication_log_client::ReplicationLogClient; use libsql_sys::wal::Sqlite3WalManager; use libsql_sys::EncryptionConfig; use tokio::io::AsyncBufReadExt; @@ -25,20 +24,17 @@ use crate::auth::parse_jwt_keys; use crate::connection::config::DatabaseConfig; use crate::connection::connection_manager::InnerWalManager; use crate::connection::libsql::{open_conn, MakeLibSqlConn}; -use crate::connection::write_proxy::MakeWriteProxyConn; -use crate::connection::Connection; -use crate::connection::MakeConnection; +use crate::connection::{Connection as _, MakeConnection}; use crate::database::{ - Database, DatabaseKind, PrimaryConnection, PrimaryConnectionMaker, PrimaryDatabase, - ReplicaDatabase, SchemaDatabase, + Database, DatabaseKind, PrimaryConnection, PrimaryConnectionMaker, }; use crate::error::LoadDumpError; use crate::replication::script_backup_manager::ScriptBackupManager; use crate::replication::{FrameNo, ReplicationLogger}; -use crate::schema::{has_pending_migration_task, setup_migration_table, SchedulerHandle}; +use crate::schema::SchedulerHandle; use crate::stats::Stats; use crate::{ - run_periodic_checkpoint, StatsSender, BLOCKING_RT, DB_CREATE_TIMEOUT, DEFAULT_AUTO_CHECKPOINT, + StatsSender, BLOCKING_RT, DB_CREATE_TIMEOUT, DEFAULT_AUTO_CHECKPOINT, }; pub use fork::ForkError; @@ -101,54 +97,6 @@ pub struct Namespace { } impl Namespace { - async fn from_config( - ns_config: &NamespaceConfig, - db_config: MetaStoreHandle, - restore_option: RestoreOption, - name: &NamespaceName, - reset: ResetCb, - resolve_attach_path: ResolveNamespacePathFn, - store: NamespaceStore, - broadcaster: BroadcasterHandle, - ) -> crate::Result { - match ns_config.db_kind { - DatabaseKind::Primary if db_config.get().is_shared_schema => { - Self::new_schema( - ns_config, - name.clone(), - db_config, - restore_option, - resolve_attach_path, - broadcaster, - ) - .await - } - DatabaseKind::Primary => { - Self::new_primary( - ns_config, - name.clone(), - db_config, - restore_option, - resolve_attach_path, - broadcaster, - ) - .await - } - DatabaseKind::Replica => { - Self::new_replica( - ns_config, - name.clone(), - db_config, - reset, - resolve_attach_path, - store, - broadcaster, - ) - .await - } - } - } - pub(crate) fn name(&self) -> &NamespaceName { &self.name } @@ -248,40 +196,6 @@ impl Namespace { self.db_config_store.changed() } - async fn new_primary( - config: &NamespaceConfig, - name: NamespaceName, - meta_store_handle: MetaStoreHandle, - restore_option: RestoreOption, - resolve_attach_path: ResolveNamespacePathFn, - broadcaster: BroadcasterHandle, - ) -> crate::Result { - let db_path: Arc = config.base_path.join("dbs").join(name.as_str()).into(); - let fresh_namespace = !db_path.try_exists()?; - // FIXME: make that truly atomic. explore the idea of using temp directories, and it's implications - match Self::try_new_primary( - config, - name.clone(), - meta_store_handle, - restore_option, - resolve_attach_path, - db_path.clone(), - broadcaster, - ) - .await - { - Ok(this) => Ok(this), - Err(e) if fresh_namespace => { - tracing::error!("an error occured while deleting creating namespace, cleaning..."); - if let Err(e) = tokio::fs::remove_dir_all(&db_path).await { - tracing::error!("failed to remove dirty namespace directory: {e}") - } - Err(e) - } - Err(e) => Err(e), - } - } - #[tracing::instrument(skip_all)] async fn make_primary_connection_maker( ns_config: &NamespaceConfig, @@ -417,237 +331,6 @@ impl Namespace { Ok((connection_maker, wal_wrapper, stats)) } - #[tracing::instrument(skip_all, fields(namespace))] - async fn try_new_primary( - ns_config: &NamespaceConfig, - namespace: NamespaceName, - meta_store_handle: MetaStoreHandle, - restore_option: RestoreOption, - resolve_attach_path: ResolveNamespacePathFn, - db_path: Arc, - broadcaster: BroadcasterHandle, - ) -> crate::Result { - let mut join_set = JoinSet::new(); - - tokio::fs::create_dir_all(&db_path).await?; - - let block_writes = Arc::new(AtomicBool::new(false)); - let (connection_maker, wal_wrapper, stats) = Self::make_primary_connection_maker( - ns_config, - &meta_store_handle, - &db_path, - &namespace, - restore_option, - block_writes.clone(), - &mut join_set, - resolve_attach_path, - broadcaster, - ) - .await?; - let connection_maker = Arc::new(connection_maker); - - if meta_store_handle.get().shared_schema_name.is_some() { - let block_writes = block_writes.clone(); - let conn = connection_maker.create().await?; - tokio::task::spawn_blocking(move || { - conn.with_raw(|conn| -> crate::Result<()> { - setup_migration_table(conn)?; - if has_pending_migration_task(conn)? { - block_writes.store(true, Ordering::SeqCst); - } - Ok(()) - }) - }) - .await - .unwrap()?; - } - - if let Some(checkpoint_interval) = ns_config.checkpoint_interval { - join_set.spawn(run_periodic_checkpoint( - connection_maker.clone(), - checkpoint_interval, - namespace.clone(), - )); - } - - tracing::debug!("Done making new primary"); - - Ok(Self { - tasks: join_set, - db: Database::Primary(PrimaryDatabase { - wal_wrapper, - connection_maker, - block_writes, - }), - name: namespace, - stats, - db_config_store: meta_store_handle, - path: db_path.into(), - }) - } - - #[tracing::instrument(skip_all, fields(name))] - #[async_recursion::async_recursion] - async fn new_replica( - config: &NamespaceConfig, - name: NamespaceName, - meta_store_handle: MetaStoreHandle, - reset: ResetCb, - resolve_attach_path: ResolveNamespacePathFn, - store: NamespaceStore, - broadcaster: BroadcasterHandle, - ) -> crate::Result { - tracing::debug!("creating replica namespace"); - let db_path = config.base_path.join("dbs").join(name.as_str()); - let channel = config.channel.clone().expect("bad replica config"); - let uri = config.uri.clone().expect("bad replica config"); - - let rpc_client = ReplicationLogClient::with_origin(channel.clone(), uri.clone()); - let client = crate::replication::replicator_client::Client::new( - name.clone(), - rpc_client, - &db_path, - meta_store_handle.clone(), - store.clone(), - ) - .await?; - let applied_frame_no_receiver = client.current_frame_no_notifier.subscribe(); - let mut replicator = libsql_replication::replicator::Replicator::new( - client, - db_path.join("data"), - DEFAULT_AUTO_CHECKPOINT, - config.encryption_config.clone(), - ) - .await?; - - tracing::debug!("try perform handshake"); - // force a handshake now, to retrieve the primary's current replication index - match replicator.try_perform_handshake().await { - Err(libsql_replication::replicator::Error::Meta( - libsql_replication::meta::Error::LogIncompatible, - )) => { - tracing::error!( - "trying to replicate incompatible logs, reseting replica and nuking db dir" - ); - std::fs::remove_dir_all(&db_path).unwrap(); - return Self::new_replica( - config, - name, - meta_store_handle, - reset, - resolve_attach_path, - store, - broadcaster, - ) - .await; - } - Err(e) => Err(e)?, - Ok(_) => (), - } - - tracing::debug!("done performing handshake"); - - let primary_current_replicatio_index = replicator.client_mut().primary_replication_index; - - let mut join_set = JoinSet::new(); - let namespace = name.clone(); - join_set.spawn(async move { - use libsql_replication::replicator::Error; - loop { - match replicator.run().await { - err @ Error::Fatal(_) => Err(err)?, - err @ Error::NamespaceDoesntExist => { - tracing::error!("namespace {namespace} doesn't exist, destroying..."); - (reset)(ResetOp::Destroy(namespace.clone())); - Err(err)?; - } - e @ Error::Injector(_) => { - tracing::error!("potential corruption detected while replicating, reseting replica: {e}"); - (reset)(ResetOp::Reset(namespace.clone())); - Err(e)?; - }, - Error::Meta(err) => { - use libsql_replication::meta::Error; - match err { - Error::LogIncompatible => { - tracing::error!("trying to replicate incompatible logs, reseting replica"); - (reset)(ResetOp::Reset(namespace.clone())); - Err(err)?; - } - Error::InvalidMetaFile - | Error::Io(_) - | Error::InvalidLogId - | Error::FailedToCommit(_) - | Error::InvalidReplicationPath - | Error::RequiresCleanDatabase => { - // We retry from last frame index? - tracing::warn!("non-fatal replication error, retrying from last commit index: {err}"); - }, - } - } - e @ (Error::Internal(_) - | Error::Client(_) - | Error::PrimaryHandshakeTimeout - | Error::NeedSnapshot) => { - tracing::warn!("non-fatal replication error, retrying from last commit index: {e}"); - }, - Error::NoHandshake => { - // not strictly necessary, but in case the handshake error goes uncaught, - // we reset the client state. - replicator.client_mut().reset_token(); - } - Error::SnapshotPending => unreachable!(), - } - } - }); - - let stats = make_stats( - &db_path, - &mut join_set, - meta_store_handle.clone(), - config.stats_sender.clone(), - name.clone(), - applied_frame_no_receiver.clone(), - config.encryption_config.clone(), - ) - .await?; - - let connection_maker = MakeWriteProxyConn::new( - db_path.clone(), - config.extensions.clone(), - channel.clone(), - uri.clone(), - stats.clone(), - broadcaster, - meta_store_handle.clone(), - applied_frame_no_receiver, - config.max_response_size, - config.max_total_response_size, - primary_current_replicatio_index, - config.encryption_config.clone(), - resolve_attach_path, - config.make_wal_manager.clone(), - ) - .await? - .throttled( - config.max_concurrent_connections.clone(), - Some(DB_CREATE_TIMEOUT), - config.max_total_response_size, - config.max_concurrent_requests, - ); - - Ok(Self { - tasks: join_set, - db: Database::Replica(ReplicaDatabase { - connection_maker: Arc::new(connection_maker), - }), - name, - stats, - db_config_store: meta_store_handle, - path: db_path.into(), - }) - } - async fn fork( ns_config: &NamespaceConfig, from_ns: &Namespace, @@ -655,9 +338,7 @@ impl Namespace { to_ns: NamespaceName, to_config: MetaStoreHandle, timestamp: Option, - resolve_attach: ResolveNamespacePathFn, store: NamespaceStore, - broadcaster: BroadcasterHandle, ) -> crate::Result { let from_config = from_config.get(); match ns_config.db_kind { @@ -696,10 +377,7 @@ impl Namespace { logger, restore_to, to_config, - ns_config, - resolve_attach, store, - broadcaster: broadcaster.handle(to_ns), }; let ns = fork_task.fork().await?; @@ -708,48 +386,6 @@ impl Namespace { DatabaseKind::Replica => Err(ForkError::ForkReplica.into()), } } - - async fn new_schema( - ns_config: &NamespaceConfig, - name: NamespaceName, - meta_store_handle: MetaStoreHandle, - restore_option: RestoreOption, - resolve_attach_path: ResolveNamespacePathFn, - broadcaster: BroadcasterHandle, - ) -> crate::Result { - let mut join_set = JoinSet::new(); - let db_path = ns_config.base_path.join("dbs").join(name.as_str()); - - tokio::fs::create_dir_all(&db_path).await?; - - let (connection_maker, wal_manager, stats) = Self::make_primary_connection_maker( - ns_config, - &meta_store_handle, - &db_path, - &name, - restore_option, - Arc::new(AtomicBool::new(false)), // this is always false for schema - &mut join_set, - resolve_attach_path, - broadcaster, - ) - .await?; - - Ok(Namespace { - db: Database::Schema(SchemaDatabase::new( - ns_config.migration_scheduler.clone(), - name.clone(), - connection_maker, - wal_manager, - meta_store_handle.clone(), - )), - name, - tasks: join_set, - stats, - db_config_store: meta_store_handle, - path: db_path.into(), - }) - } } pub struct NamespaceConfig { diff --git a/libsql-server/src/namespace/store.rs b/libsql-server/src/namespace/store.rs index 984a520154..5a94a7f8eb 100644 --- a/libsql-server/src/namespace/store.rs +++ b/libsql-server/src/namespace/store.rs @@ -13,8 +13,10 @@ use tokio_stream::wrappers::BroadcastStream; use crate::auth::Authenticated; use crate::broadcaster::BroadcastMsg; use crate::connection::config::DatabaseConfig; +use crate::database::DatabaseKind; use crate::error::Error; use crate::metrics::NAMESPACE_LOAD_LATENCY; +use crate::namespace::configurator::{PrimaryConfigurator, ReplicaConfigurator, SchemaConfigurator}; use crate::namespace::{NamespaceBottomlessDbId, NamespaceBottomlessDbIdInit, NamespaceName}; use crate::stats::Stats; @@ -82,6 +84,12 @@ impl NamespaceStore { .time_to_idle(Duration::from_secs(86400)) .build(); + let mut configurators = NamespaceConfigurators::default(); + configurators + .with_primary(PrimaryConfigurator) + .with_replica(ReplicaConfigurator) + .with_schema(SchemaConfigurator); + Ok(Self { inner: Arc::new(NamespaceStoreInner { store, @@ -92,7 +100,7 @@ impl NamespaceStore { config, schema_locks: Default::default(), broadcasters: Default::default(), - configurators: NamespaceConfigurators::default(), + configurators, }), }) } @@ -177,27 +185,17 @@ impl NamespaceStore { ns.destroy().await?; } - let handle = self.inner.metadata.handle(namespace.clone()); + let db_config = self.inner.metadata.handle(namespace.clone()); // destroy on-disk database Namespace::cleanup( &self.inner.config, &namespace, - &handle.get(), + &db_config.get(), false, NamespaceBottomlessDbIdInit::FetchFromConfig, ) .await?; - let ns = Namespace::from_config( - &self.inner.config, - handle, - restore_option, - &namespace, - self.make_reset_cb(), - self.resolve_attach_fn(), - self.clone(), - self.broadcaster(namespace.clone()), - ) - .await?; + let ns = self.make_namespace(&namespace, db_config, restore_option).await?; lock.replace(ns); @@ -304,9 +302,7 @@ impl NamespaceStore { to.clone(), handle.clone(), timestamp, - self.resolve_attach_fn(), self.clone(), - self.broadcaster(to), ) .await?; @@ -381,30 +377,42 @@ impl NamespaceStore { .clone() } + pub(crate) async fn make_namespace( + &self, + namespace: &NamespaceName, + config: MetaStoreHandle, + restore_option: RestoreOption, + ) -> crate::Result { + let configurator = match self.inner.config.db_kind { + DatabaseKind::Primary if config.get().is_shared_schema => { + self.inner.configurators.configure_schema()? + } + DatabaseKind::Primary => self.inner.configurators.configure_primary()?, + DatabaseKind::Replica => self.inner.configurators.configure_replica()?, + }; + let ns = configurator.setup( + &self.inner.config, + config, + restore_option, + namespace, + self.make_reset_cb(), + self.resolve_attach_fn(), + self.clone(), + self.broadcaster(namespace.clone()), + ).await?; + + Ok(ns) + } + async fn load_namespace( &self, namespace: &NamespaceName, db_config: MetaStoreHandle, restore_option: RestoreOption, ) -> crate::Result { - let init = { - let namespace = namespace.clone(); - async move { - let ns = Namespace::from_config( - &self.inner.config, - db_config, - restore_option, - &namespace, - self.make_reset_cb(), - self.resolve_attach_fn(), - self.clone(), - self.broadcaster(namespace.clone()), - ) - .await?; - tracing::info!("loaded namespace: `{namespace}`"); - - Ok(Some(ns)) - } + let init = async { + let ns = self.make_namespace(namespace, db_config, restore_option).await?; + Ok(Some(ns)) }; let before_load = Instant::now(); From 907f2f9381783b09254e605473455c72e97cd40b Mon Sep 17 00:00:00 2001 From: ad hoc Date: Mon, 5 Aug 2024 11:14:40 +0200 Subject: [PATCH 07/50] pass configurators to NamespaceStore::new --- libsql-server/src/lib.rs | 5 +++ .../src/namespace/configurator/mod.rs | 38 ++++++++++++------- libsql-server/src/namespace/mod.rs | 2 +- libsql-server/src/namespace/store.rs | 10 +---- libsql-server/src/schema/scheduler.rs | 38 ++++++++++++++----- 5 files changed, 62 insertions(+), 31 deletions(-) diff --git a/libsql-server/src/lib.rs b/libsql-server/src/lib.rs index 5404a11108..3d816d6bc3 100644 --- a/libsql-server/src/lib.rs +++ b/libsql-server/src/lib.rs @@ -60,6 +60,7 @@ use utils::services::idle_shutdown::IdleShutdownKicker; use self::config::MetaStoreConfig; use self::connection::connection_manager::InnerWalManager; +use self::namespace::configurator::NamespaceConfigurators; use self::namespace::NamespaceStore; use self::net::AddrIncoming; use self::replication::script_backup_manager::{CommandHandler, ScriptBackupManager}; @@ -488,12 +489,16 @@ where meta_store_wal_manager, ) .await?; + + let configurators = NamespaceConfigurators::default(); + let namespace_store: NamespaceStore = NamespaceStore::new( db_kind.is_replica(), self.db_config.snapshot_at_shutdown, self.max_active_namespaces, ns_config, meta_store, + configurators, ) .await?; diff --git a/libsql-server/src/namespace/configurator/mod.rs b/libsql-server/src/namespace/configurator/mod.rs index d3cd390b34..a240c3e410 100644 --- a/libsql-server/src/namespace/configurator/mod.rs +++ b/libsql-server/src/namespace/configurator/mod.rs @@ -4,43 +4,55 @@ use futures::Future; use super::broadcasters::BroadcasterHandle; use super::meta_store::MetaStoreHandle; -use super::{NamespaceConfig, NamespaceName, NamespaceStore, ResetCb, ResolveNamespacePathFn, RestoreOption}; +use super::{ + NamespaceConfig, NamespaceName, NamespaceStore, ResetCb, ResolveNamespacePathFn, RestoreOption, +}; -mod replica; mod primary; +mod replica; mod schema; -pub use replica::ReplicaConfigurator; pub use primary::PrimaryConfigurator; +pub use replica::ReplicaConfigurator; pub use schema::SchemaConfigurator; type DynConfigurator = dyn ConfigureNamespace + Send + Sync + 'static; -#[derive(Default)] pub(crate) struct NamespaceConfigurators { replica_configurator: Option>, primary_configurator: Option>, schema_configurator: Option>, } +impl Default for NamespaceConfigurators { + fn default() -> Self { + Self::empty() + .with_primary(PrimaryConfigurator) + .with_replica(ReplicaConfigurator) + .with_schema(SchemaConfigurator) + } +} + impl NamespaceConfigurators { - pub fn with_primary( - &mut self, - c: impl ConfigureNamespace + Send + Sync + 'static, - ) -> &mut Self { + pub fn empty() -> Self { + Self { + replica_configurator: None, + primary_configurator: None, + schema_configurator: None, + } + } + + pub fn with_primary(mut self, c: impl ConfigureNamespace + Send + Sync + 'static) -> Self { self.primary_configurator = Some(Box::new(c)); self } - pub fn with_replica( - &mut self, - c: impl ConfigureNamespace + Send + Sync + 'static, - ) -> &mut Self { + pub fn with_replica(mut self, c: impl ConfigureNamespace + Send + Sync + 'static) -> Self { self.replica_configurator = Some(Box::new(c)); self } - pub fn with_schema(&mut self, c: impl ConfigureNamespace + Send + Sync + 'static) -> &mut Self { + pub fn with_schema(mut self, c: impl ConfigureNamespace + Send + Sync + 'static) -> Self { self.schema_configurator = Some(Box::new(c)); self } diff --git a/libsql-server/src/namespace/mod.rs b/libsql-server/src/namespace/mod.rs index 41bb3ab9cc..5ccda74c54 100644 --- a/libsql-server/src/namespace/mod.rs +++ b/libsql-server/src/namespace/mod.rs @@ -52,7 +52,7 @@ mod name; pub mod replication_wal; mod schema_lock; mod store; -mod configurator; +pub(crate) mod configurator; pub type ResetCb = Box; pub type ResolveNamespacePathFn = diff --git a/libsql-server/src/namespace/store.rs b/libsql-server/src/namespace/store.rs index 5a94a7f8eb..fbce8cd78b 100644 --- a/libsql-server/src/namespace/store.rs +++ b/libsql-server/src/namespace/store.rs @@ -16,7 +16,6 @@ use crate::connection::config::DatabaseConfig; use crate::database::DatabaseKind; use crate::error::Error; use crate::metrics::NAMESPACE_LOAD_LATENCY; -use crate::namespace::configurator::{PrimaryConfigurator, ReplicaConfigurator, SchemaConfigurator}; use crate::namespace::{NamespaceBottomlessDbId, NamespaceBottomlessDbIdInit, NamespaceName}; use crate::stats::Stats; @@ -54,12 +53,13 @@ pub struct NamespaceStoreInner { } impl NamespaceStore { - pub async fn new( + pub(crate) async fn new( allow_lazy_creation: bool, snapshot_at_shutdown: bool, max_active_namespaces: usize, config: NamespaceConfig, metadata: MetaStore, + configurators: NamespaceConfigurators, ) -> crate::Result { tracing::trace!("Max active namespaces: {max_active_namespaces}"); let store = Cache::::builder() @@ -84,12 +84,6 @@ impl NamespaceStore { .time_to_idle(Duration::from_secs(86400)) .build(); - let mut configurators = NamespaceConfigurators::default(); - configurators - .with_primary(PrimaryConfigurator) - .with_replica(ReplicaConfigurator) - .with_schema(SchemaConfigurator); - Ok(Self { inner: Arc::new(NamespaceStoreInner { store, diff --git a/libsql-server/src/schema/scheduler.rs b/libsql-server/src/schema/scheduler.rs index 17fdfb3143..17ce655064 100644 --- a/libsql-server/src/schema/scheduler.rs +++ b/libsql-server/src/schema/scheduler.rs @@ -808,6 +808,9 @@ mod test { use crate::connection::config::DatabaseConfig; use crate::database::DatabaseKind; + use crate::namespace::configurator::{ + NamespaceConfigurators, PrimaryConfigurator, SchemaConfigurator, + }; use crate::namespace::meta_store::{metastore_connection_maker, MetaStore}; use crate::namespace::{NamespaceConfig, RestoreOption}; use crate::schema::SchedulerHandle; @@ -826,9 +829,16 @@ mod test { .unwrap(); let (sender, mut receiver) = mpsc::channel(100); let config = make_config(sender.clone().into(), tmp.path()); - let store = NamespaceStore::new(false, false, 10, config, meta_store) - .await - .unwrap(); + let store = NamespaceStore::new( + false, + false, + 10, + config, + meta_store, + NamespaceConfigurators::default(), + ) + .await + .unwrap(); let mut scheduler = Scheduler::new(store.clone(), maker().unwrap()) .await .unwrap(); @@ -936,9 +946,16 @@ mod test { .unwrap(); let (sender, mut receiver) = mpsc::channel(100); let config = make_config(sender.clone().into(), tmp.path()); - let store = NamespaceStore::new(false, false, 10, config, meta_store) - .await - .unwrap(); + let store = NamespaceStore::new( + false, + false, + 10, + config, + meta_store, + NamespaceConfigurators::default(), + ) + .await + .unwrap(); let mut scheduler = Scheduler::new(store.clone(), maker().unwrap()) .await .unwrap(); @@ -1012,7 +1029,7 @@ mod test { .unwrap(); let (sender, _receiver) = mpsc::channel(100); let config = make_config(sender.clone().into(), tmp.path()); - let store = NamespaceStore::new(false, false, 10, config, meta_store) + let store = NamespaceStore::new(false, false, 10, config, meta_store, NamespaceConfigurators::default()) .await .unwrap(); @@ -1039,7 +1056,10 @@ mod test { .unwrap(); let (sender, mut receiver) = mpsc::channel(100); let config = make_config(sender.clone().into(), tmp.path()); - let store = NamespaceStore::new(false, false, 10, config, meta_store) + let configurators = NamespaceConfigurators::default() + .with_schema(SchemaConfigurator) + .with_primary(PrimaryConfigurator); + let store = NamespaceStore::new(false, false, 10, config, meta_store, configurators) .await .unwrap(); let mut scheduler = Scheduler::new(store.clone(), maker().unwrap()) @@ -1112,7 +1132,7 @@ mod test { .unwrap(); let (sender, _receiver) = mpsc::channel(100); let config = make_config(sender.clone().into(), tmp.path()); - let store = NamespaceStore::new(false, false, 10, config, meta_store) + let store = NamespaceStore::new(false, false, 10, config, meta_store, NamespaceConfigurators::default()) .await .unwrap(); let scheduler = Scheduler::new(store.clone(), maker().unwrap()) From fd03144bcc30e7b85f07abf4630ba9ea58a87d11 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Mon, 5 Aug 2024 15:26:34 +0200 Subject: [PATCH 08/50] decoupled namespace configurators --- libsql-server/src/error.rs | 2 +- libsql-server/src/lib.rs | 86 ++- .../src/namespace/{ => configurator}/fork.rs | 62 +- .../src/namespace/configurator/helpers.rs | 451 ++++++++++++++ .../src/namespace/configurator/mod.rs | 67 ++- .../src/namespace/configurator/primary.rs | 249 +++++--- .../src/namespace/configurator/replica.rs | 140 +++-- .../src/namespace/configurator/schema.rs | 71 ++- libsql-server/src/namespace/mod.rs | 551 +----------------- libsql-server/src/namespace/store.rs | 103 ++-- libsql-server/src/schema/scheduler.rs | 80 ++- 11 files changed, 1056 insertions(+), 806 deletions(-) rename libsql-server/src/namespace/{ => configurator}/fork.rs (77%) create mode 100644 libsql-server/src/namespace/configurator/helpers.rs diff --git a/libsql-server/src/error.rs b/libsql-server/src/error.rs index 371630abdf..9cd0b81485 100644 --- a/libsql-server/src/error.rs +++ b/libsql-server/src/error.rs @@ -4,7 +4,7 @@ use tonic::metadata::errors::InvalidMetadataValueBytes; use crate::{ auth::AuthError, - namespace::{ForkError, NamespaceName}, + namespace::{configurator::fork::ForkError, NamespaceName}, query_result_builder::QueryResultBuilderError, }; diff --git a/libsql-server/src/lib.rs b/libsql-server/src/lib.rs index 3d816d6bc3..8bd3ea4fac 100644 --- a/libsql-server/src/lib.rs +++ b/libsql-server/src/lib.rs @@ -46,7 +46,7 @@ use libsql_wal::registry::WalRegistry; use libsql_wal::storage::NoStorage; use libsql_wal::wal::LibsqlWalManager; use namespace::meta_store::MetaStoreHandle; -use namespace::{NamespaceConfig, NamespaceName}; +use namespace::NamespaceName; use net::Connector; use once_cell::sync::Lazy; use rusqlite::ffi::SQLITE_CONFIG_MALLOC; @@ -60,7 +60,7 @@ use utils::services::idle_shutdown::IdleShutdownKicker; use self::config::MetaStoreConfig; use self::connection::connection_manager::InnerWalManager; -use self::namespace::configurator::NamespaceConfigurators; +use self::namespace::configurator::{BaseNamespaceConfig, NamespaceConfigurators, PrimaryConfigurator, PrimaryExtraConfig, ReplicaConfigurator, SchemaConfigurator}; use self::namespace::NamespaceStore; use self::net::AddrIncoming; use self::replication::script_backup_manager::{CommandHandler, ScriptBackupManager}; @@ -425,11 +425,6 @@ where let user_auth_strategy = self.user_api_config.auth_strategy.clone(); let service_shutdown = Arc::new(Notify::new()); - let db_kind = if self.rpc_client_config.is_some() { - DatabaseKind::Replica - } else { - DatabaseKind::Primary - }; let scripted_backup = match self.db_config.snapshot_exec { Some(ref command) => { @@ -457,27 +452,6 @@ where // chose the wal backend let (make_wal_manager, registry_shutdown) = self.configure_wal_manager(&mut join_set)?; - let ns_config = NamespaceConfig { - db_kind, - base_path: self.path.clone(), - max_log_size: self.db_config.max_log_size, - max_log_duration: self.db_config.max_log_duration.map(Duration::from_secs_f32), - bottomless_replication: self.db_config.bottomless_replication.clone(), - extensions, - stats_sender: stats_sender.clone(), - max_response_size: self.db_config.max_response_size, - max_total_response_size: self.db_config.max_total_response_size, - checkpoint_interval: self.db_config.checkpoint_interval, - encryption_config: self.db_config.encryption_config.clone(), - max_concurrent_connections: Arc::new(Semaphore::new(self.max_concurrent_connections)), - scripted_backup, - max_concurrent_requests: self.db_config.max_concurrent_requests, - channel: channel.clone(), - uri: uri.clone(), - migration_scheduler: scheduler_sender.into(), - make_wal_manager, - }; - let (metastore_conn_maker, meta_store_wal_manager) = metastore_connection_maker(self.meta_store_config.bottomless.clone(), &self.path) .await?; @@ -490,15 +464,67 @@ where ) .await?; - let configurators = NamespaceConfigurators::default(); + let base_config = BaseNamespaceConfig { + base_path: self.path.clone(), + extensions, + stats_sender, + max_response_size: self.db_config.max_response_size, + max_total_response_size: self.db_config.max_total_response_size, + max_concurrent_connections: Arc::new(Semaphore::new(self.max_concurrent_connections)), + max_concurrent_requests: self.db_config.max_concurrent_requests, + }; + + let mut configurators = NamespaceConfigurators::default(); + + let db_kind = match channel.clone().zip(uri.clone()) { + // replica mode + Some((channel, uri)) => { + let replica_configurator = ReplicaConfigurator::new( + base_config, + channel, + uri, + make_wal_manager, + ); + configurators.with_replica(replica_configurator); + DatabaseKind::Replica + } + // primary mode + None => { + let primary_config = PrimaryExtraConfig { + max_log_size: self.db_config.max_log_size, + max_log_duration: self.db_config.max_log_duration.map(Duration::from_secs_f32), + bottomless_replication: self.db_config.bottomless_replication.clone(), + scripted_backup, + checkpoint_interval: self.db_config.checkpoint_interval, + }; + + let primary_configurator = PrimaryConfigurator::new( + base_config.clone(), + primary_config.clone(), + make_wal_manager.clone(), + ); + + let schema_configurator = SchemaConfigurator::new( + base_config.clone(), + primary_config, + make_wal_manager.clone(), + scheduler_sender.into(), + ); + + configurators.with_schema(schema_configurator); + configurators.with_primary(primary_configurator); + + DatabaseKind::Primary + }, + }; let namespace_store: NamespaceStore = NamespaceStore::new( db_kind.is_replica(), self.db_config.snapshot_at_shutdown, self.max_active_namespaces, - ns_config, meta_store, configurators, + db_kind, ) .await?; diff --git a/libsql-server/src/namespace/fork.rs b/libsql-server/src/namespace/configurator/fork.rs similarity index 77% rename from libsql-server/src/namespace/fork.rs rename to libsql-server/src/namespace/configurator/fork.rs index f25bf7a9a9..26a0b99b61 100644 --- a/libsql-server/src/namespace/fork.rs +++ b/libsql-server/src/namespace/configurator/fork.rs @@ -12,15 +12,71 @@ use tokio::io::{AsyncSeekExt, AsyncWriteExt}; use tokio::time::Duration; use tokio_stream::StreamExt; +use crate::database::Database; +use crate::namespace::meta_store::MetaStoreHandle; +use crate::namespace::{Namespace, NamespaceBottomlessDbId}; use crate::replication::primary::frame_stream::FrameStream; use crate::replication::{LogReadError, ReplicationLogger}; use crate::{BLOCKING_RT, LIBSQL_PAGE_SIZE}; -use super::meta_store::MetaStoreHandle; -use super::{NamespaceName, NamespaceStore, RestoreOption}; +use super::helpers::make_bottomless_options; +use super::{NamespaceName, NamespaceStore, PrimaryExtraConfig, RestoreOption}; type Result = crate::Result; +pub(super) async fn fork( + from_ns: &Namespace, + from_config: MetaStoreHandle, + to_ns: NamespaceName, + to_config: MetaStoreHandle, + timestamp: Option, + store: NamespaceStore, + primary_config: &PrimaryExtraConfig, + base_path: Arc, +) -> crate::Result { + let from_config = from_config.get(); + let bottomless_db_id = NamespaceBottomlessDbId::from_config(&from_config); + let restore_to = if let Some(timestamp) = timestamp { + if let Some(ref options) = primary_config.bottomless_replication { + Some(PointInTimeRestore { + timestamp, + replicator_options: make_bottomless_options( + options, + bottomless_db_id.clone(), + from_ns.name().clone(), + ), + }) + } else { + return Err(crate::Error::Fork(ForkError::BackupServiceNotConfigured)); + } + } else { + None + }; + + let logger = match &from_ns.db { + Database::Primary(db) => db.wal_wrapper.wrapper().logger(), + Database::Schema(db) => db.wal_wrapper.wrapper().logger(), + _ => { + return Err(crate::Error::Fork(ForkError::Internal(anyhow::Error::msg( + "Invalid source database type for fork", + )))); + } + }; + + let fork_task = ForkTask { + base_path, + to_namespace: to_ns.clone(), + logger, + restore_to, + to_config, + store, + }; + + let ns = fork_task.fork().await?; + + Ok(ns) +} + #[derive(Debug, thiserror::Error)] pub enum ForkError { #[error("internal error: {0}")] @@ -58,7 +114,7 @@ pub struct ForkTask { pub to_namespace: NamespaceName, pub to_config: MetaStoreHandle, pub restore_to: Option, - pub store: NamespaceStore, + pub store: NamespaceStore } pub struct PointInTimeRestore { diff --git a/libsql-server/src/namespace/configurator/helpers.rs b/libsql-server/src/namespace/configurator/helpers.rs new file mode 100644 index 0000000000..f43fa8a192 --- /dev/null +++ b/libsql-server/src/namespace/configurator/helpers.rs @@ -0,0 +1,451 @@ +use std::path::{Path, PathBuf}; +use std::sync::Weak; +use std::sync::{atomic::AtomicBool, Arc}; +use std::time::Duration; + +use anyhow::Context as _; +use bottomless::replicator::Options; +use bytes::Bytes; +use futures::Stream; +use libsql_sys::wal::Sqlite3WalManager; +use tokio::io::AsyncBufReadExt as _; +use tokio::sync::watch; +use tokio::task::JoinSet; +use tokio_util::io::StreamReader; +use enclose::enclose; + +use crate::connection::config::DatabaseConfig; +use crate::connection::connection_manager::InnerWalManager; +use crate::connection::libsql::{open_conn, MakeLibSqlConn}; +use crate::connection::{Connection as _, MakeConnection as _}; +use crate::error::LoadDumpError; +use crate::replication::{FrameNo, ReplicationLogger}; +use crate::stats::Stats; +use crate::namespace::{NamespaceBottomlessDbId, NamespaceBottomlessDbIdInit, NamespaceName, ResolveNamespacePathFn, RestoreOption}; +use crate::namespace::replication_wal::{make_replication_wal_wrapper, ReplicationWalWrapper}; +use crate::namespace::meta_store::MetaStoreHandle; +use crate::namespace::broadcasters::BroadcasterHandle; +use crate::database::{PrimaryConnection, PrimaryConnectionMaker}; +use crate::{StatsSender, BLOCKING_RT, DB_CREATE_TIMEOUT, DEFAULT_AUTO_CHECKPOINT}; + +use super::{BaseNamespaceConfig, PrimaryExtraConfig}; + +const WASM_TABLE_CREATE: &str = + "CREATE TABLE libsql_wasm_func_table (name text PRIMARY KEY, body text) WITHOUT ROWID;"; + +#[tracing::instrument(skip_all)] +pub(super) async fn make_primary_connection_maker( + primary_config: &PrimaryExtraConfig, + base_config: &BaseNamespaceConfig, + meta_store_handle: &MetaStoreHandle, + db_path: &Path, + name: &NamespaceName, + restore_option: RestoreOption, + block_writes: Arc, + join_set: &mut JoinSet>, + resolve_attach_path: ResolveNamespacePathFn, + broadcaster: BroadcasterHandle, + make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, +) -> crate::Result<(PrimaryConnectionMaker, ReplicationWalWrapper, Arc)> { + let db_config = meta_store_handle.get(); + let bottomless_db_id = NamespaceBottomlessDbId::from_config(&db_config); + // FIXME: figure how to to it per-db + let mut is_dirty = { + let sentinel_path = db_path.join(".sentinel"); + if sentinel_path.try_exists()? { + true + } else { + tokio::fs::File::create(&sentinel_path).await?; + false + } + }; + + // FIXME: due to a bug in logger::checkpoint_db we call regular checkpointing code + // instead of our virtual WAL one. It's a bit tangled to fix right now, because + // we need WAL context for checkpointing, and WAL context needs the ReplicationLogger... + // So instead we checkpoint early, *before* bottomless gets initialized. That way + // we're sure bottomless won't try to back up any existing WAL frames and will instead + // treat the existing db file as the source of truth. + + let bottomless_replicator = match primary_config.bottomless_replication { + Some(ref options) => { + tracing::debug!("Checkpointing before initializing bottomless"); + crate::replication::primary::logger::checkpoint_db(&db_path.join("data"))?; + tracing::debug!("Checkpointed before initializing bottomless"); + let options = make_bottomless_options(options, bottomless_db_id, name.clone()); + let (replicator, did_recover) = + init_bottomless_replicator(db_path.join("data"), options, &restore_option) + .await?; + tracing::debug!("Completed init of bottomless replicator"); + is_dirty |= did_recover; + Some(replicator) + } + None => None, + }; + + tracing::debug!("Checking fresh db"); + let is_fresh_db = check_fresh_db(&db_path)?; + // switch frame-count checkpoint to time-based one + let auto_checkpoint = if primary_config.checkpoint_interval.is_some() { + 0 + } else { + DEFAULT_AUTO_CHECKPOINT + }; + + let logger = Arc::new(ReplicationLogger::open( + &db_path, + primary_config.max_log_size, + primary_config.max_log_duration, + is_dirty, + auto_checkpoint, + primary_config.scripted_backup.clone(), + name.clone(), + None, + )?); + + tracing::debug!("sending stats"); + + let stats = make_stats( + &db_path, + join_set, + meta_store_handle.clone(), + base_config.stats_sender.clone(), + name.clone(), + logger.new_frame_notifier.subscribe(), + ) + .await?; + + tracing::debug!("Making replication wal wrapper"); + let wal_wrapper = make_replication_wal_wrapper(bottomless_replicator, logger.clone()); + + tracing::debug!("Opening libsql connection"); + + let connection_maker = MakeLibSqlConn::new( + db_path.to_path_buf(), + wal_wrapper.clone(), + stats.clone(), + broadcaster, + meta_store_handle.clone(), + base_config.extensions.clone(), + base_config.max_response_size, + base_config.max_total_response_size, + auto_checkpoint, + logger.new_frame_notifier.subscribe(), + None, + block_writes, + resolve_attach_path, + make_wal_manager.clone(), + ) + .await? + .throttled( + base_config.max_concurrent_connections.clone(), + Some(DB_CREATE_TIMEOUT), + base_config.max_total_response_size, + base_config.max_concurrent_requests, + ); + + tracing::debug!("Completed opening libsql connection"); + + // this must happen after we create the connection maker. The connection maker old on a + // connection to ensure that no other connection is closing while we try to open the dump. + // that would cause a SQLITE_LOCKED error. + match restore_option { + RestoreOption::Dump(_) if !is_fresh_db => { + Err(LoadDumpError::LoadDumpExistingDb)?; + } + RestoreOption::Dump(dump) => { + let conn = connection_maker.create().await?; + tracing::debug!("Loading dump"); + load_dump(dump, conn).await?; + tracing::debug!("Done loading dump"); + } + _ => { /* other cases were already handled when creating bottomless */ } + } + + join_set.spawn(run_periodic_compactions(logger.clone())); + + tracing::debug!("Done making primary connection"); + + Ok((connection_maker, wal_wrapper, stats)) +} + +pub(super) fn make_bottomless_options( + options: &Options, + namespace_db_id: NamespaceBottomlessDbId, + name: NamespaceName, +) -> Options { + let mut options = options.clone(); + let mut db_id = match namespace_db_id { + NamespaceBottomlessDbId::Namespace(id) => id, + // FIXME(marin): I don't like that, if bottomless is enabled, proper config must be passed. + NamespaceBottomlessDbId::NotProvided => options.db_id.unwrap_or_default(), + }; + + db_id = format!("ns-{db_id}:{name}"); + options.db_id = Some(db_id); + options +} + +async fn init_bottomless_replicator( + path: impl AsRef, + options: bottomless::replicator::Options, + restore_option: &RestoreOption, +) -> anyhow::Result<(bottomless::replicator::Replicator, bool)> { + tracing::debug!("Initializing bottomless replication"); + let path = path + .as_ref() + .to_str() + .ok_or_else(|| anyhow::anyhow!("Invalid db path"))? + .to_owned(); + let mut replicator = bottomless::replicator::Replicator::with_options(path, options).await?; + + let (generation, timestamp) = match restore_option { + RestoreOption::Latest | RestoreOption::Dump(_) => (None, None), + RestoreOption::Generation(generation) => (Some(*generation), None), + RestoreOption::PointInTime(timestamp) => (None, Some(*timestamp)), + }; + + let (action, did_recover) = replicator.restore(generation, timestamp).await?; + match action { + bottomless::replicator::RestoreAction::SnapshotMainDbFile => { + replicator.new_generation().await; + if let Some(_handle) = replicator.snapshot_main_db_file(true).await? { + tracing::trace!("got snapshot handle after restore with generation upgrade"); + } + // Restoration process only leaves the local WAL file if it was + // detected to be newer than its remote counterpart. + replicator.maybe_replicate_wal().await? + } + bottomless::replicator::RestoreAction::ReuseGeneration(gen) => { + replicator.set_generation(gen); + } + } + + Ok((replicator, did_recover)) +} + +async fn run_periodic_compactions(logger: Arc) -> anyhow::Result<()> { + // calling `ReplicationLogger::maybe_compact()` is cheap if the compaction does not actually + // take place, so we can afford to poll it very often for simplicity + let mut interval = tokio::time::interval(tokio::time::Duration::from_millis(1000)); + interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); + + loop { + interval.tick().await; + let handle = BLOCKING_RT.spawn_blocking(enclose! {(logger) move || { + logger.maybe_compact() + }}); + handle + .await + .expect("Compaction task crashed") + .context("Compaction failed")?; + } +} + +async fn load_dump(dump: S, conn: PrimaryConnection) -> crate::Result<(), LoadDumpError> +where + S: Stream> + Unpin, +{ + let mut reader = tokio::io::BufReader::new(StreamReader::new(dump)); + let mut curr = String::new(); + let mut line = String::new(); + let mut skipped_wasm_table = false; + let mut n_stmt = 0; + let mut line_id = 0; + + while let Ok(n) = reader.read_line(&mut curr).await { + line_id += 1; + if n == 0 { + break; + } + let trimmed = curr.trim(); + if trimmed.is_empty() || trimmed.starts_with("--") { + curr.clear(); + continue; + } + // FIXME: it's well known bug that comment ending with semicolon will be handled incorrectly by currend dump processing code + let statement_end = trimmed.ends_with(';'); + + // we want to concat original(non-trimmed) lines as trimming will join all them in one + // single-line statement which is incorrect if comments in the end are present + line.push_str(&curr); + curr.clear(); + + // This is a hack to ignore the libsql_wasm_func_table table because it is already created + // by the system. + if !skipped_wasm_table && line.trim() == WASM_TABLE_CREATE { + skipped_wasm_table = true; + line.clear(); + continue; + } + + if statement_end { + n_stmt += 1; + // dump must be performd within a txn + if n_stmt > 2 && conn.is_autocommit().await.unwrap() { + return Err(LoadDumpError::NoTxn); + } + + line = tokio::task::spawn_blocking({ + let conn = conn.clone(); + move || -> crate::Result { + conn.with_raw(|conn| conn.execute(&line, ())).map_err(|e| { + LoadDumpError::Internal(format!("line: {}, error: {}", line_id, e)) + })?; + Ok(line) + } + }) + .await??; + line.clear(); + } else { + line.push(' '); + } + } + tracing::debug!("loaded {} lines from dump", line_id); + + if !conn.is_autocommit().await.unwrap() { + tokio::task::spawn_blocking({ + let conn = conn.clone(); + move || -> crate::Result<(), LoadDumpError> { + conn.with_raw(|conn| conn.execute("rollback", ()))?; + Ok(()) + } + }) + .await??; + return Err(LoadDumpError::NoCommit); + } + + Ok(()) +} + +fn check_fresh_db(path: &Path) -> crate::Result { + let is_fresh = !path.join("wallog").try_exists()?; + Ok(is_fresh) +} + +pub(super) async fn make_stats( + db_path: &Path, + join_set: &mut JoinSet>, + meta_store_handle: MetaStoreHandle, + stats_sender: StatsSender, + name: NamespaceName, + mut current_frame_no: watch::Receiver>, +) -> anyhow::Result> { + tracing::debug!("creating stats type"); + let stats = Stats::new(name.clone(), db_path, join_set).await?; + + // the storage monitor is optional, so we ignore the error here. + tracing::debug!("stats created, sending stats"); + let _ = stats_sender + .send((name.clone(), meta_store_handle, Arc::downgrade(&stats))) + .await; + + join_set.spawn({ + let stats = stats.clone(); + // initialize the current_frame_no value + current_frame_no + .borrow_and_update() + .map(|fno| stats.set_current_frame_no(fno)); + async move { + while current_frame_no.changed().await.is_ok() { + current_frame_no + .borrow_and_update() + .map(|fno| stats.set_current_frame_no(fno)); + } + Ok(()) + } + }); + + join_set.spawn(run_storage_monitor( + db_path.into(), + Arc::downgrade(&stats), + )); + + tracing::debug!("done sending stats, and creating bg tasks"); + + Ok(stats) +} + +// Periodically check the storage used by the database and save it in the Stats structure. +// TODO: Once we have a separate fiber that does WAL checkpoints, running this routine +// right after checkpointing is exactly where it should be done. +async fn run_storage_monitor( + db_path: PathBuf, + stats: Weak, +) -> anyhow::Result<()> { + // on initialization, the database file doesn't exist yet, so we wait a bit for it to be + // created + tokio::time::sleep(Duration::from_secs(1)).await; + + let duration = tokio::time::Duration::from_secs(60); + let db_path: Arc = db_path.into(); + loop { + let db_path = db_path.clone(); + let Some(stats) = stats.upgrade() else { + return Ok(()); + }; + + let _ = tokio::task::spawn_blocking(move || { + // because closing the last connection interferes with opening a new one, we lazily + // initialize a connection here, and keep it alive for the entirety of the program. If we + // fail to open it, we wait for `duration` and try again later. + match open_conn(&db_path, Sqlite3WalManager::new(), Some(rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY), None) { + Ok(mut conn) => { + if let Ok(tx) = conn.transaction() { + let page_count = tx.query_row("pragma page_count;", [], |row| { row.get::(0) }); + let freelist_count = tx.query_row("pragma freelist_count;", [], |row| { row.get::(0) }); + if let (Ok(page_count), Ok(freelist_count)) = (page_count, freelist_count) { + let storage_bytes_used = (page_count - freelist_count) * 4096; + stats.set_storage_bytes_used(storage_bytes_used); + } + } + }, + Err(e) => { + tracing::warn!("failed to open connection for storager monitor: {e}, trying again in {duration:?}"); + }, + } + }).await; + + tokio::time::sleep(duration).await; + } +} + +pub(super) async fn cleanup_primary( + base: &BaseNamespaceConfig, + primary_config: &PrimaryExtraConfig, + namespace: &NamespaceName, + db_config: &DatabaseConfig, + prune_all: bool, + bottomless_db_id_init: NamespaceBottomlessDbIdInit, +) -> crate::Result<()> { + let ns_path = base.base_path.join("dbs").join(namespace.as_str()); + if let Some(ref options) = primary_config.bottomless_replication { + let bottomless_db_id = match bottomless_db_id_init { + NamespaceBottomlessDbIdInit::Provided(db_id) => db_id, + NamespaceBottomlessDbIdInit::FetchFromConfig => { + NamespaceBottomlessDbId::from_config(db_config) + } + }; + let options = make_bottomless_options(options, bottomless_db_id, namespace.clone()); + let replicator = bottomless::replicator::Replicator::with_options( + ns_path.join("data").to_str().unwrap(), + options, + ) + .await?; + if prune_all { + let delete_all = replicator.delete_all(None).await?; + // perform hard deletion in the background + tokio::spawn(delete_all.commit()); + } else { + // for soft delete make sure that local db is fully backed up + replicator.savepoint().confirmed().await?; + } + } + + if ns_path.try_exists()? { + tracing::debug!("removing database directory: {}", ns_path.display()); + tokio::fs::remove_dir_all(ns_path).await?; + } + + Ok(()) +} diff --git a/libsql-server/src/namespace/configurator/mod.rs b/libsql-server/src/namespace/configurator/mod.rs index a240c3e410..e5db335ff6 100644 --- a/libsql-server/src/namespace/configurator/mod.rs +++ b/libsql-server/src/namespace/configurator/mod.rs @@ -1,22 +1,51 @@ +use std::path::{Path, PathBuf}; use std::pin::Pin; +use std::sync::Arc; +use std::time::Duration; +use chrono::NaiveDateTime; use futures::Future; +use tokio::sync::Semaphore; + +use crate::connection::config::DatabaseConfig; +use crate::replication::script_backup_manager::ScriptBackupManager; +use crate::StatsSender; use super::broadcasters::BroadcasterHandle; use super::meta_store::MetaStoreHandle; -use super::{ - NamespaceConfig, NamespaceName, NamespaceStore, ResetCb, ResolveNamespacePathFn, RestoreOption, -}; +use super::{Namespace, NamespaceBottomlessDbIdInit, NamespaceName, NamespaceStore, ResetCb, ResolveNamespacePathFn, RestoreOption}; +mod helpers; mod primary; mod replica; mod schema; +pub mod fork; pub use primary::PrimaryConfigurator; pub use replica::ReplicaConfigurator; pub use schema::SchemaConfigurator; -type DynConfigurator = dyn ConfigureNamespace + Send + Sync + 'static; +#[derive(Clone, Debug)] +pub struct BaseNamespaceConfig { + pub(crate) base_path: Arc, + pub(crate) extensions: Arc<[PathBuf]>, + pub(crate) stats_sender: StatsSender, + pub(crate) max_response_size: u64, + pub(crate) max_total_response_size: u64, + pub(crate) max_concurrent_connections: Arc, + pub(crate) max_concurrent_requests: u64, +} + +#[derive(Clone)] +pub struct PrimaryExtraConfig { + pub(crate) max_log_size: u64, + pub(crate) max_log_duration: Option, + pub(crate) bottomless_replication: Option, + pub(crate) scripted_backup: Option, + pub(crate) checkpoint_interval: Option, +} + +pub type DynConfigurator = dyn ConfigureNamespace + Send + Sync + 'static; pub(crate) struct NamespaceConfigurators { replica_configurator: Option>, @@ -27,9 +56,6 @@ pub(crate) struct NamespaceConfigurators { impl Default for NamespaceConfigurators { fn default() -> Self { Self::empty() - .with_primary(PrimaryConfigurator) - .with_replica(ReplicaConfigurator) - .with_schema(SchemaConfigurator) } } @@ -42,17 +68,17 @@ impl NamespaceConfigurators { } } - pub fn with_primary(mut self, c: impl ConfigureNamespace + Send + Sync + 'static) -> Self { + pub fn with_primary(&mut self, c: impl ConfigureNamespace + Send + Sync + 'static) -> &mut Self { self.primary_configurator = Some(Box::new(c)); self } - pub fn with_replica(mut self, c: impl ConfigureNamespace + Send + Sync + 'static) -> Self { + pub fn with_replica(&mut self, c: impl ConfigureNamespace + Send + Sync + 'static) -> &mut Self { self.replica_configurator = Some(Box::new(c)); self } - pub fn with_schema(mut self, c: impl ConfigureNamespace + Send + Sync + 'static) -> Self { + pub fn with_schema(&mut self, c: impl ConfigureNamespace + Send + Sync + 'static) -> &mut Self { self.schema_configurator = Some(Box::new(c)); self } @@ -73,7 +99,6 @@ impl NamespaceConfigurators { pub trait ConfigureNamespace { fn setup<'a>( &'a self, - ns_config: &'a NamespaceConfig, db_config: MetaStoreHandle, restore_option: RestoreOption, name: &'a NamespaceName, @@ -81,5 +106,23 @@ pub trait ConfigureNamespace { resolve_attach_path: ResolveNamespacePathFn, store: NamespaceStore, broadcaster: BroadcasterHandle, - ) -> Pin> + Send + 'a>>; + ) -> Pin> + Send + 'a>>; + + fn cleanup<'a>( + &'a self, + namespace: &'a NamespaceName, + db_config: &'a DatabaseConfig, + prune_all: bool, + bottomless_db_id_init: NamespaceBottomlessDbIdInit, + ) -> Pin> + Send + 'a>>; + + fn fork<'a>( + &'a self, + from_ns: &'a Namespace, + from_config: MetaStoreHandle, + to_ns: NamespaceName, + to_config: MetaStoreHandle, + timestamp: Option, + store: NamespaceStore, + ) -> Pin> + Send + 'a>>; } diff --git a/libsql-server/src/namespace/configurator/primary.rs b/libsql-server/src/namespace/configurator/primary.rs index f28d288a97..4351f6a3ac 100644 --- a/libsql-server/src/namespace/configurator/primary.rs +++ b/libsql-server/src/namespace/configurator/primary.rs @@ -4,22 +4,117 @@ use std::{path::Path, pin::Pin, sync::Arc}; use futures::prelude::Future; use tokio::task::JoinSet; +use crate::connection::config::DatabaseConfig; +use crate::connection::connection_manager::InnerWalManager; use crate::connection::MakeConnection; use crate::database::{Database, PrimaryDatabase}; -use crate::namespace::{Namespace, NamespaceConfig, NamespaceName, NamespaceStore, ResetCb, ResolveNamespacePathFn, RestoreOption}; -use crate::namespace::meta_store::MetaStoreHandle; use crate::namespace::broadcasters::BroadcasterHandle; +use crate::namespace::configurator::helpers::make_primary_connection_maker; +use crate::namespace::meta_store::MetaStoreHandle; +use crate::namespace::{ + Namespace, NamespaceBottomlessDbIdInit, NamespaceName, NamespaceStore, + ResetCb, ResolveNamespacePathFn, RestoreOption, +}; use crate::run_periodic_checkpoint; use crate::schema::{has_pending_migration_task, setup_migration_table}; -use super::ConfigureNamespace; +use super::helpers::cleanup_primary; +use super::{BaseNamespaceConfig, ConfigureNamespace, PrimaryExtraConfig}; + +pub struct PrimaryConfigurator { + base: BaseNamespaceConfig, + primary_config: PrimaryExtraConfig, + make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, +} + +impl PrimaryConfigurator { + pub fn new( + base: BaseNamespaceConfig, + primary_config: PrimaryExtraConfig, + make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, + ) -> Self { + Self { + base, + primary_config, + make_wal_manager, + } + } + + #[tracing::instrument(skip_all, fields(namespace))] + async fn try_new_primary( + &self, + namespace: NamespaceName, + meta_store_handle: MetaStoreHandle, + restore_option: RestoreOption, + resolve_attach_path: ResolveNamespacePathFn, + db_path: Arc, + broadcaster: BroadcasterHandle, + ) -> crate::Result { + let mut join_set = JoinSet::new(); + + tokio::fs::create_dir_all(&db_path).await?; + + let block_writes = Arc::new(AtomicBool::new(false)); + let (connection_maker, wal_wrapper, stats) = make_primary_connection_maker( + &self.primary_config, + &self.base, + &meta_store_handle, + &db_path, + &namespace, + restore_option, + block_writes.clone(), + &mut join_set, + resolve_attach_path, + broadcaster, + self.make_wal_manager.clone(), + ) + .await?; + let connection_maker = Arc::new(connection_maker); + + if meta_store_handle.get().shared_schema_name.is_some() { + let block_writes = block_writes.clone(); + let conn = connection_maker.create().await?; + tokio::task::spawn_blocking(move || { + conn.with_raw(|conn| -> crate::Result<()> { + setup_migration_table(conn)?; + if has_pending_migration_task(conn)? { + block_writes.store(true, Ordering::SeqCst); + } + Ok(()) + }) + }) + .await + .unwrap()?; + } + + if let Some(checkpoint_interval) = self.primary_config.checkpoint_interval { + join_set.spawn(run_periodic_checkpoint( + connection_maker.clone(), + checkpoint_interval, + namespace.clone(), + )); + } + + tracing::debug!("Done making new primary"); -pub struct PrimaryConfigurator; + Ok(Namespace { + tasks: join_set, + db: Database::Primary(PrimaryDatabase { + wal_wrapper, + connection_maker, + block_writes, + }), + name: namespace, + stats, + db_config_store: meta_store_handle, + path: db_path.into(), + }) + } +} impl ConfigureNamespace for PrimaryConfigurator { fn setup<'a>( &'a self, - config: &'a NamespaceConfig, meta_store_handle: MetaStoreHandle, restore_option: RestoreOption, name: &'a NamespaceName, @@ -27,102 +122,74 @@ impl ConfigureNamespace for PrimaryConfigurator { resolve_attach_path: ResolveNamespacePathFn, _store: NamespaceStore, broadcaster: BroadcasterHandle, - ) -> Pin> + Send + 'a>> - { + ) -> Pin> + Send + 'a>> { Box::pin(async move { - let db_path: Arc = config.base_path.join("dbs").join(name.as_str()).into(); + let db_path: Arc = self.base.base_path.join("dbs").join(name.as_str()).into(); let fresh_namespace = !db_path.try_exists()?; // FIXME: make that truly atomic. explore the idea of using temp directories, and it's implications - match try_new_primary( - config, - name.clone(), - meta_store_handle, - restore_option, - resolve_attach_path, - db_path.clone(), - broadcaster, - ) + match self + .try_new_primary( + name.clone(), + meta_store_handle, + restore_option, + resolve_attach_path, + db_path.clone(), + broadcaster, + ) .await - { - Ok(this) => Ok(this), - Err(e) if fresh_namespace => { - tracing::error!("an error occured while deleting creating namespace, cleaning..."); - if let Err(e) = tokio::fs::remove_dir_all(&db_path).await { - tracing::error!("failed to remove dirty namespace directory: {e}") - } - Err(e) + { + Ok(this) => Ok(this), + Err(e) if fresh_namespace => { + tracing::error!( + "an error occured while deleting creating namespace, cleaning..." + ); + if let Err(e) = tokio::fs::remove_dir_all(&db_path).await { + tracing::error!("failed to remove dirty namespace directory: {e}") } - Err(e) => Err(e), + Err(e) } + Err(e) => Err(e), + } }) } -} -#[tracing::instrument(skip_all, fields(namespace))] -async fn try_new_primary( - ns_config: &NamespaceConfig, - namespace: NamespaceName, - meta_store_handle: MetaStoreHandle, - restore_option: RestoreOption, - resolve_attach_path: ResolveNamespacePathFn, - db_path: Arc, - broadcaster: BroadcasterHandle, -) -> crate::Result { - let mut join_set = JoinSet::new(); - - tokio::fs::create_dir_all(&db_path).await?; - - let block_writes = Arc::new(AtomicBool::new(false)); - let (connection_maker, wal_wrapper, stats) = Namespace::make_primary_connection_maker( - ns_config, - &meta_store_handle, - &db_path, - &namespace, - restore_option, - block_writes.clone(), - &mut join_set, - resolve_attach_path, - broadcaster, - ) - .await?; - let connection_maker = Arc::new(connection_maker); - - if meta_store_handle.get().shared_schema_name.is_some() { - let block_writes = block_writes.clone(); - let conn = connection_maker.create().await?; - tokio::task::spawn_blocking(move || { - conn.with_raw(|conn| -> crate::Result<()> { - setup_migration_table(conn)?; - if has_pending_migration_task(conn)? { - block_writes.store(true, Ordering::SeqCst); - } - Ok(()) - }) + fn cleanup<'a>( + &'a self, + namespace: &'a NamespaceName, + db_config: &'a DatabaseConfig, + prune_all: bool, + bottomless_db_id_init: NamespaceBottomlessDbIdInit, + ) -> Pin> + Send + 'a>> { + Box::pin(async move { + cleanup_primary( + &self.base, + &self.primary_config, + namespace, + db_config, + prune_all, + bottomless_db_id_init, + ).await }) - .await - .unwrap()?; - } - - if let Some(checkpoint_interval) = ns_config.checkpoint_interval { - join_set.spawn(run_periodic_checkpoint( - connection_maker.clone(), - checkpoint_interval, - namespace.clone(), - )); } - tracing::debug!("Done making new primary"); - - Ok(Namespace { - tasks: join_set, - db: Database::Primary(PrimaryDatabase { - wal_wrapper, - connection_maker, - block_writes, - }), - name: namespace, - stats, - db_config_store: meta_store_handle, - path: db_path.into(), - }) + fn fork<'a>( + &'a self, + from_ns: &'a Namespace, + from_config: MetaStoreHandle, + to_ns: NamespaceName, + to_config: MetaStoreHandle, + timestamp: Option, + store: NamespaceStore, + ) -> Pin> + Send + 'a>> { + Box::pin(super::fork::fork( + from_ns, + from_config, + to_ns, + to_config, + timestamp, + store, + &self.primary_config, + self.base.base_path.clone())) + } } + diff --git a/libsql-server/src/namespace/configurator/replica.rs b/libsql-server/src/namespace/configurator/replica.rs index 4d3ca1dadf..61dd48b0bf 100644 --- a/libsql-server/src/namespace/configurator/replica.rs +++ b/libsql-server/src/namespace/configurator/replica.rs @@ -2,29 +2,51 @@ use std::pin::Pin; use std::sync::Arc; use futures::Future; +use hyper::Uri; use libsql_replication::rpc::replication::replication_log_client::ReplicationLogClient; use tokio::task::JoinSet; +use tonic::transport::Channel; +use crate::connection::config::DatabaseConfig; +use crate::connection::connection_manager::InnerWalManager; use crate::connection::write_proxy::MakeWriteProxyConn; use crate::connection::MakeConnection; use crate::database::{Database, ReplicaDatabase}; use crate::namespace::broadcasters::BroadcasterHandle; +use crate::namespace::configurator::helpers::make_stats; use crate::namespace::meta_store::MetaStoreHandle; -use crate::namespace::{Namespace, RestoreOption}; -use crate::namespace::{ - make_stats, NamespaceConfig, NamespaceName, NamespaceStore, ResetCb, ResetOp, - ResolveNamespacePathFn, -}; +use crate::namespace::{Namespace, NamespaceBottomlessDbIdInit, RestoreOption}; +use crate::namespace::{NamespaceName, NamespaceStore, ResetCb, ResetOp, ResolveNamespacePathFn}; use crate::{DB_CREATE_TIMEOUT, DEFAULT_AUTO_CHECKPOINT}; -use super::ConfigureNamespace; +use super::{BaseNamespaceConfig, ConfigureNamespace}; -pub struct ReplicaConfigurator; +pub struct ReplicaConfigurator { + base: BaseNamespaceConfig, + channel: Channel, + uri: Uri, + make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, +} + +impl ReplicaConfigurator { + pub fn new( + base: BaseNamespaceConfig, + channel: Channel, + uri: Uri, + make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, + ) -> Self { + Self { + base, + channel, + uri, + make_wal_manager, + } + } +} impl ConfigureNamespace for ReplicaConfigurator { fn setup<'a>( &'a self, - config: &'a NamespaceConfig, meta_store_handle: MetaStoreHandle, restore_option: RestoreOption, name: &'a NamespaceName, @@ -32,13 +54,12 @@ impl ConfigureNamespace for ReplicaConfigurator { resolve_attach_path: ResolveNamespacePathFn, store: NamespaceStore, broadcaster: BroadcasterHandle, - ) -> Pin> + Send + 'a>> - { + ) -> Pin> + Send + 'a>> { Box::pin(async move { tracing::debug!("creating replica namespace"); - let db_path = config.base_path.join("dbs").join(name.as_str()); - let channel = config.channel.clone().expect("bad replica config"); - let uri = config.uri.clone().expect("bad replica config"); + let db_path = self.base.base_path.join("dbs").join(name.as_str()); + let channel = self.channel.clone(); + let uri = self.uri.clone(); let rpc_client = ReplicationLogClient::with_origin(channel.clone(), uri.clone()); let client = crate::replication::replicator_client::Client::new( @@ -48,45 +69,46 @@ impl ConfigureNamespace for ReplicaConfigurator { meta_store_handle.clone(), store.clone(), ) - .await?; + .await?; let applied_frame_no_receiver = client.current_frame_no_notifier.subscribe(); let mut replicator = libsql_replication::replicator::Replicator::new( client, db_path.join("data"), DEFAULT_AUTO_CHECKPOINT, - config.encryption_config.clone(), + None, ) - .await?; + .await?; tracing::debug!("try perform handshake"); // force a handshake now, to retrieve the primary's current replication index match replicator.try_perform_handshake().await { Err(libsql_replication::replicator::Error::Meta( - libsql_replication::meta::Error::LogIncompatible, + libsql_replication::meta::Error::LogIncompatible, )) => { tracing::error!( "trying to replicate incompatible logs, reseting replica and nuking db dir" ); std::fs::remove_dir_all(&db_path).unwrap(); - return self.setup( - config, - meta_store_handle, - restore_option, - name, - reset, - resolve_attach_path, - store, - broadcaster, - ) + return self + .setup( + meta_store_handle, + restore_option, + name, + reset, + resolve_attach_path, + store, + broadcaster, + ) .await; - } + } Err(e) => Err(e)?, Ok(_) => (), } tracing::debug!("done performing handshake"); - let primary_current_replicatio_index = replicator.client_mut().primary_replication_index; + let primary_current_replicatio_index = + replicator.client_mut().primary_replication_index; let mut join_set = JoinSet::new(); let namespace = name.clone(); @@ -144,36 +166,35 @@ impl ConfigureNamespace for ReplicaConfigurator { &db_path, &mut join_set, meta_store_handle.clone(), - config.stats_sender.clone(), + self.base.stats_sender.clone(), name.clone(), applied_frame_no_receiver.clone(), - config.encryption_config.clone(), ) - .await?; + .await?; let connection_maker = MakeWriteProxyConn::new( db_path.clone(), - config.extensions.clone(), + self.base.extensions.clone(), channel.clone(), uri.clone(), stats.clone(), broadcaster, meta_store_handle.clone(), applied_frame_no_receiver, - config.max_response_size, - config.max_total_response_size, + self.base.max_response_size, + self.base.max_total_response_size, primary_current_replicatio_index, - config.encryption_config.clone(), + None, resolve_attach_path, - config.make_wal_manager.clone(), + self.make_wal_manager.clone(), ) - .await? - .throttled( - config.max_concurrent_connections.clone(), - Some(DB_CREATE_TIMEOUT), - config.max_total_response_size, - config.max_concurrent_requests, - ); + .await? + .throttled( + self.base.max_concurrent_connections.clone(), + Some(DB_CREATE_TIMEOUT), + self.base.max_total_response_size, + self.base.max_concurrent_requests, + ); Ok(Namespace { tasks: join_set, @@ -187,4 +208,35 @@ impl ConfigureNamespace for ReplicaConfigurator { }) }) } + + fn cleanup<'a>( + &'a self, + namespace: &'a NamespaceName, + _db_config: &DatabaseConfig, + _prune_all: bool, + _bottomless_db_id_init: NamespaceBottomlessDbIdInit, + ) -> Pin> + Send + 'a>> { + Box::pin(async move { + let ns_path = self.base.base_path.join("dbs").join(namespace.as_str()); + if ns_path.try_exists()? { + tracing::debug!("removing database directory: {}", ns_path.display()); + tokio::fs::remove_dir_all(ns_path).await?; + } + Ok(()) + }) + } + + fn fork<'a>( + &'a self, + _from_ns: &'a Namespace, + _from_config: MetaStoreHandle, + _to_ns: NamespaceName, + _to_config: MetaStoreHandle, + _timestamp: Option, + _store: NamespaceStore, + ) -> Pin> + Send + 'a>> { + Box::pin(std::future::ready(Err(crate::Error::Fork( + super::fork::ForkError::ForkReplica, + )))) + } } diff --git a/libsql-server/src/namespace/configurator/schema.rs b/libsql-server/src/namespace/configurator/schema.rs index 864b75239f..e55c706fec 100644 --- a/libsql-server/src/namespace/configurator/schema.rs +++ b/libsql-server/src/namespace/configurator/schema.rs @@ -3,22 +3,36 @@ use std::sync::{atomic::AtomicBool, Arc}; use futures::prelude::Future; use tokio::task::JoinSet; +use crate::connection::config::DatabaseConfig; +use crate::connection::connection_manager::InnerWalManager; use crate::database::{Database, SchemaDatabase}; use crate::namespace::meta_store::MetaStoreHandle; use crate::namespace::{ - Namespace, NamespaceConfig, NamespaceName, NamespaceStore, + Namespace, NamespaceName, NamespaceStore, ResetCb, ResolveNamespacePathFn, RestoreOption, }; use crate::namespace::broadcasters::BroadcasterHandle; +use crate::schema::SchedulerHandle; -use super::ConfigureNamespace; +use super::helpers::{cleanup_primary, make_primary_connection_maker}; +use super::{BaseNamespaceConfig, ConfigureNamespace, PrimaryExtraConfig}; -pub struct SchemaConfigurator; +pub struct SchemaConfigurator { + base: BaseNamespaceConfig, + primary_config: PrimaryExtraConfig, + make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, + migration_scheduler: SchedulerHandle, +} + +impl SchemaConfigurator { + pub fn new(base: BaseNamespaceConfig, primary_config: PrimaryExtraConfig, make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, migration_scheduler: SchedulerHandle) -> Self { + Self { base, primary_config, make_wal_manager, migration_scheduler } + } +} impl ConfigureNamespace for SchemaConfigurator { fn setup<'a>( &'a self, - ns_config: &'a NamespaceConfig, db_config: MetaStoreHandle, restore_option: RestoreOption, name: &'a NamespaceName, @@ -29,12 +43,13 @@ impl ConfigureNamespace for SchemaConfigurator { ) -> std::pin::Pin> + Send + 'a>> { Box::pin(async move { let mut join_set = JoinSet::new(); - let db_path = ns_config.base_path.join("dbs").join(name.as_str()); + let db_path = self.base.base_path.join("dbs").join(name.as_str()); tokio::fs::create_dir_all(&db_path).await?; - let (connection_maker, wal_manager, stats) = Namespace::make_primary_connection_maker( - ns_config, + let (connection_maker, wal_manager, stats) = make_primary_connection_maker( + &self.primary_config, + &self.base, &db_config, &db_path, &name, @@ -43,12 +58,13 @@ impl ConfigureNamespace for SchemaConfigurator { &mut join_set, resolve_attach_path, broadcaster, + self.make_wal_manager.clone() ) .await?; Ok(Namespace { db: Database::Schema(SchemaDatabase::new( - ns_config.migration_scheduler.clone(), + self.migration_scheduler.clone(), name.clone(), connection_maker, wal_manager, @@ -62,4 +78,43 @@ impl ConfigureNamespace for SchemaConfigurator { }) }) } + + fn cleanup<'a>( + &'a self, + namespace: &'a NamespaceName, + db_config: &'a DatabaseConfig, + prune_all: bool, + bottomless_db_id_init: crate::namespace::NamespaceBottomlessDbIdInit, + ) -> std::pin::Pin> + Send + 'a>> { + Box::pin(async move { + cleanup_primary( + &self.base, + &self.primary_config, + namespace, + db_config, + prune_all, + bottomless_db_id_init, + ).await + }) + } + + fn fork<'a>( + &'a self, + from_ns: &'a Namespace, + from_config: MetaStoreHandle, + to_ns: NamespaceName, + to_config: MetaStoreHandle, + timestamp: Option, + store: NamespaceStore, + ) -> std::pin::Pin> + Send + 'a>> { + Box::pin(super::fork::fork( + from_ns, + from_config, + to_ns, + to_config, + timestamp, + store, + &self.primary_config, + self.base.base_path.clone())) + } } diff --git a/libsql-server/src/namespace/mod.rs b/libsql-server/src/namespace/mod.rs index 5ccda74c54..7cfa6b351c 100644 --- a/libsql-server/src/namespace/mod.rs +++ b/libsql-server/src/namespace/mod.rs @@ -1,52 +1,24 @@ -use std::path::{Path, PathBuf}; -use std::sync::atomic::AtomicBool; -use std::sync::{Arc, Weak}; +use std::path::Path; +use std::sync::Arc; -use anyhow::{Context as _, Error}; -use bottomless::replicator::Options; -use broadcasters::BroadcasterHandle; +use anyhow::Context as _; use bytes::Bytes; use chrono::NaiveDateTime; -use enclose::enclose; use futures_core::{Future, Stream}; -use hyper::Uri; -use libsql_sys::wal::Sqlite3WalManager; -use libsql_sys::EncryptionConfig; -use tokio::io::AsyncBufReadExt; -use tokio::sync::{watch, Semaphore}; use tokio::task::JoinSet; -use tokio::time::Duration; -use tokio_util::io::StreamReader; -use tonic::transport::Channel; use uuid::Uuid; use crate::auth::parse_jwt_keys; use crate::connection::config::DatabaseConfig; -use crate::connection::connection_manager::InnerWalManager; -use crate::connection::libsql::{open_conn, MakeLibSqlConn}; -use crate::connection::{Connection as _, MakeConnection}; -use crate::database::{ - Database, DatabaseKind, PrimaryConnection, PrimaryConnectionMaker, -}; -use crate::error::LoadDumpError; -use crate::replication::script_backup_manager::ScriptBackupManager; -use crate::replication::{FrameNo, ReplicationLogger}; -use crate::schema::SchedulerHandle; +use crate::connection::Connection as _; +use crate::database::Database; use crate::stats::Stats; -use crate::{ - StatsSender, BLOCKING_RT, DB_CREATE_TIMEOUT, DEFAULT_AUTO_CHECKPOINT, -}; -pub use fork::ForkError; - -use self::fork::{ForkTask, PointInTimeRestore}; use self::meta_store::MetaStoreHandle; pub use self::name::NamespaceName; -use self::replication_wal::{make_replication_wal_wrapper, ReplicationWalWrapper}; pub use self::store::NamespaceStore; pub mod broadcasters; -mod fork; pub mod meta_store; mod name; pub mod replication_wal; @@ -101,51 +73,6 @@ impl Namespace { &self.name } - /// completely remove resources associated with the namespace - pub(crate) async fn cleanup( - ns_config: &NamespaceConfig, - name: &NamespaceName, - db_config: &DatabaseConfig, - prune_all: bool, - bottomless_db_id_init: NamespaceBottomlessDbIdInit, - ) -> crate::Result<()> { - let ns_path = ns_config.base_path.join("dbs").join(name.as_str()); - match ns_config.db_kind { - DatabaseKind::Primary => { - if let Some(ref options) = ns_config.bottomless_replication { - let bottomless_db_id = match bottomless_db_id_init { - NamespaceBottomlessDbIdInit::Provided(db_id) => db_id, - NamespaceBottomlessDbIdInit::FetchFromConfig => { - NamespaceBottomlessDbId::from_config(&db_config) - } - }; - let options = make_bottomless_options(options, bottomless_db_id, name.clone()); - let replicator = bottomless::replicator::Replicator::with_options( - ns_path.join("data").to_str().unwrap(), - options, - ) - .await?; - if prune_all { - let delete_all = replicator.delete_all(None).await?; - // perform hard deletion in the background - tokio::spawn(delete_all.commit()); - } else { - // for soft delete make sure that local db is fully backed up - replicator.savepoint().confirmed().await?; - } - } - } - DatabaseKind::Replica => (), - } - - if ns_path.try_exists()? { - tracing::debug!("removing database directory: {}", ns_path.display()); - tokio::fs::remove_dir_all(ns_path).await?; - } - - Ok(()) - } - async fn destroy(mut self) -> anyhow::Result<()> { self.tasks.shutdown().await; self.db.destroy(); @@ -195,293 +122,11 @@ impl Namespace { pub fn config_changed(&self) -> impl Future { self.db_config_store.changed() } - - #[tracing::instrument(skip_all)] - async fn make_primary_connection_maker( - ns_config: &NamespaceConfig, - meta_store_handle: &MetaStoreHandle, - db_path: &Path, - name: &NamespaceName, - restore_option: RestoreOption, - block_writes: Arc, - join_set: &mut JoinSet>, - resolve_attach_path: ResolveNamespacePathFn, - broadcaster: BroadcasterHandle, - ) -> crate::Result<(PrimaryConnectionMaker, ReplicationWalWrapper, Arc)> { - let db_config = meta_store_handle.get(); - let bottomless_db_id = NamespaceBottomlessDbId::from_config(&db_config); - // FIXME: figure how to to it per-db - let mut is_dirty = { - let sentinel_path = db_path.join(".sentinel"); - if sentinel_path.try_exists()? { - true - } else { - tokio::fs::File::create(&sentinel_path).await?; - false - } - }; - - // FIXME: due to a bug in logger::checkpoint_db we call regular checkpointing code - // instead of our virtual WAL one. It's a bit tangled to fix right now, because - // we need WAL context for checkpointing, and WAL context needs the ReplicationLogger... - // So instead we checkpoint early, *before* bottomless gets initialized. That way - // we're sure bottomless won't try to back up any existing WAL frames and will instead - // treat the existing db file as the source of truth. - - let bottomless_replicator = match ns_config.bottomless_replication { - Some(ref options) => { - tracing::debug!("Checkpointing before initializing bottomless"); - crate::replication::primary::logger::checkpoint_db(&db_path.join("data"))?; - tracing::debug!("Checkpointed before initializing bottomless"); - let options = make_bottomless_options(options, bottomless_db_id, name.clone()); - let (replicator, did_recover) = - init_bottomless_replicator(db_path.join("data"), options, &restore_option) - .await?; - tracing::debug!("Completed init of bottomless replicator"); - is_dirty |= did_recover; - Some(replicator) - } - None => None, - }; - - tracing::debug!("Checking fresh db"); - let is_fresh_db = check_fresh_db(&db_path)?; - // switch frame-count checkpoint to time-based one - let auto_checkpoint = if ns_config.checkpoint_interval.is_some() { - 0 - } else { - DEFAULT_AUTO_CHECKPOINT - }; - - let logger = Arc::new(ReplicationLogger::open( - &db_path, - ns_config.max_log_size, - ns_config.max_log_duration, - is_dirty, - auto_checkpoint, - ns_config.scripted_backup.clone(), - name.clone(), - ns_config.encryption_config.clone(), - )?); - - tracing::debug!("sending stats"); - - let stats = make_stats( - &db_path, - join_set, - meta_store_handle.clone(), - ns_config.stats_sender.clone(), - name.clone(), - logger.new_frame_notifier.subscribe(), - ns_config.encryption_config.clone(), - ) - .await?; - - tracing::debug!("Making replication wal wrapper"); - let wal_wrapper = make_replication_wal_wrapper(bottomless_replicator, logger.clone()); - - tracing::debug!("Opening libsql connection"); - - let connection_maker = MakeLibSqlConn::new( - db_path.to_path_buf(), - wal_wrapper.clone(), - stats.clone(), - broadcaster, - meta_store_handle.clone(), - ns_config.extensions.clone(), - ns_config.max_response_size, - ns_config.max_total_response_size, - auto_checkpoint, - logger.new_frame_notifier.subscribe(), - ns_config.encryption_config.clone(), - block_writes, - resolve_attach_path, - ns_config.make_wal_manager.clone(), - ) - .await? - .throttled( - ns_config.max_concurrent_connections.clone(), - Some(DB_CREATE_TIMEOUT), - ns_config.max_total_response_size, - ns_config.max_concurrent_requests, - ); - - tracing::debug!("Completed opening libsql connection"); - - // this must happen after we create the connection maker. The connection maker old on a - // connection to ensure that no other connection is closing while we try to open the dump. - // that would cause a SQLITE_LOCKED error. - match restore_option { - RestoreOption::Dump(_) if !is_fresh_db => { - Err(LoadDumpError::LoadDumpExistingDb)?; - } - RestoreOption::Dump(dump) => { - let conn = connection_maker.create().await?; - tracing::debug!("Loading dump"); - load_dump(dump, conn).await?; - tracing::debug!("Done loading dump"); - } - _ => { /* other cases were already handled when creating bottomless */ } - } - - join_set.spawn(run_periodic_compactions(logger.clone())); - - tracing::debug!("Done making primary connection"); - - Ok((connection_maker, wal_wrapper, stats)) - } - - async fn fork( - ns_config: &NamespaceConfig, - from_ns: &Namespace, - from_config: MetaStoreHandle, - to_ns: NamespaceName, - to_config: MetaStoreHandle, - timestamp: Option, - store: NamespaceStore, - ) -> crate::Result { - let from_config = from_config.get(); - match ns_config.db_kind { - DatabaseKind::Primary => { - let bottomless_db_id = NamespaceBottomlessDbId::from_config(&from_config); - let restore_to = if let Some(timestamp) = timestamp { - if let Some(ref options) = ns_config.bottomless_replication { - Some(PointInTimeRestore { - timestamp, - replicator_options: make_bottomless_options( - options, - bottomless_db_id.clone(), - from_ns.name().clone(), - ), - }) - } else { - return Err(crate::Error::Fork(ForkError::BackupServiceNotConfigured)); - } - } else { - None - }; - - let logger = match &from_ns.db { - Database::Primary(db) => db.wal_wrapper.wrapper().logger(), - Database::Schema(db) => db.wal_wrapper.wrapper().logger(), - _ => { - return Err(crate::Error::Fork(ForkError::Internal(Error::msg( - "Invalid source database type for fork", - )))); - } - }; - - let fork_task = ForkTask { - base_path: ns_config.base_path.clone(), - to_namespace: to_ns.clone(), - logger, - restore_to, - to_config, - store, - }; - - let ns = fork_task.fork().await?; - Ok(ns) - } - DatabaseKind::Replica => Err(ForkError::ForkReplica.into()), - } - } -} - -pub struct NamespaceConfig { - /// Default database kind the store should be Creating - pub(crate) db_kind: DatabaseKind, - // Common config - pub(crate) base_path: Arc, - pub(crate) max_log_size: u64, - pub(crate) max_log_duration: Option, - pub(crate) extensions: Arc<[PathBuf]>, - pub(crate) stats_sender: StatsSender, - pub(crate) max_response_size: u64, - pub(crate) max_total_response_size: u64, - pub(crate) checkpoint_interval: Option, - pub(crate) max_concurrent_connections: Arc, - pub(crate) max_concurrent_requests: u64, - pub(crate) encryption_config: Option, - - // Replica specific config - /// grpc channel for replica - pub channel: Option, - /// grpc uri - pub uri: Option, - - // primary only config - pub(crate) bottomless_replication: Option, - pub(crate) scripted_backup: Option, - pub(crate) migration_scheduler: SchedulerHandle, - pub(crate) make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, } pub type DumpStream = Box> + Send + Sync + 'static + Unpin>; -fn make_bottomless_options( - options: &Options, - namespace_db_id: NamespaceBottomlessDbId, - name: NamespaceName, -) -> Options { - let mut options = options.clone(); - let mut db_id = match namespace_db_id { - NamespaceBottomlessDbId::Namespace(id) => id, - // FIXME(marin): I don't like that, if bottomless is enabled, proper config must be passed. - NamespaceBottomlessDbId::NotProvided => options.db_id.unwrap_or_default(), - }; - - db_id = format!("ns-{db_id}:{name}"); - options.db_id = Some(db_id); - options -} - -async fn make_stats( - db_path: &Path, - join_set: &mut JoinSet>, - meta_store_handle: MetaStoreHandle, - stats_sender: StatsSender, - name: NamespaceName, - mut current_frame_no: watch::Receiver>, - encryption_config: Option, -) -> anyhow::Result> { - tracing::debug!("creating stats type"); - let stats = Stats::new(name.clone(), db_path, join_set).await?; - - // the storage monitor is optional, so we ignore the error here. - tracing::debug!("stats created, sending stats"); - let _ = stats_sender - .send((name.clone(), meta_store_handle, Arc::downgrade(&stats))) - .await; - - join_set.spawn({ - let stats = stats.clone(); - // initialize the current_frame_no value - current_frame_no - .borrow_and_update() - .map(|fno| stats.set_current_frame_no(fno)); - async move { - while current_frame_no.changed().await.is_ok() { - current_frame_no - .borrow_and_update() - .map(|fno| stats.set_current_frame_no(fno)); - } - Ok(()) - } - }); - - join_set.spawn(run_storage_monitor( - db_path.into(), - Arc::downgrade(&stats), - encryption_config, - )); - - tracing::debug!("done sending stats, and creating bg tasks"); - - Ok(stats) -} - #[derive(Default)] pub enum RestoreOption { /// Restore database state from the most recent version found in a backup. @@ -495,189 +140,3 @@ pub enum RestoreOption { /// Granularity depends of how frequently WAL log pages are being snapshotted. PointInTime(NaiveDateTime), } - -const WASM_TABLE_CREATE: &str = - "CREATE TABLE libsql_wasm_func_table (name text PRIMARY KEY, body text) WITHOUT ROWID;"; - -async fn load_dump(dump: S, conn: PrimaryConnection) -> crate::Result<(), LoadDumpError> -where - S: Stream> + Unpin, -{ - let mut reader = tokio::io::BufReader::new(StreamReader::new(dump)); - let mut curr = String::new(); - let mut line = String::new(); - let mut skipped_wasm_table = false; - let mut n_stmt = 0; - let mut line_id = 0; - - while let Ok(n) = reader.read_line(&mut curr).await { - line_id += 1; - if n == 0 { - break; - } - let trimmed = curr.trim(); - if trimmed.is_empty() || trimmed.starts_with("--") { - curr.clear(); - continue; - } - // FIXME: it's well known bug that comment ending with semicolon will be handled incorrectly by currend dump processing code - let statement_end = trimmed.ends_with(';'); - - // we want to concat original(non-trimmed) lines as trimming will join all them in one - // single-line statement which is incorrect if comments in the end are present - line.push_str(&curr); - curr.clear(); - - // This is a hack to ignore the libsql_wasm_func_table table because it is already created - // by the system. - if !skipped_wasm_table && line.trim() == WASM_TABLE_CREATE { - skipped_wasm_table = true; - line.clear(); - continue; - } - - if statement_end { - n_stmt += 1; - // dump must be performd within a txn - if n_stmt > 2 && conn.is_autocommit().await.unwrap() { - return Err(LoadDumpError::NoTxn); - } - - line = tokio::task::spawn_blocking({ - let conn = conn.clone(); - move || -> crate::Result { - conn.with_raw(|conn| conn.execute(&line, ())).map_err(|e| { - LoadDumpError::Internal(format!("line: {}, error: {}", line_id, e)) - })?; - Ok(line) - } - }) - .await??; - line.clear(); - } else { - line.push(' '); - } - } - tracing::debug!("loaded {} lines from dump", line_id); - - if !conn.is_autocommit().await.unwrap() { - tokio::task::spawn_blocking({ - let conn = conn.clone(); - move || -> crate::Result<(), LoadDumpError> { - conn.with_raw(|conn| conn.execute("rollback", ()))?; - Ok(()) - } - }) - .await??; - return Err(LoadDumpError::NoCommit); - } - - Ok(()) -} - -pub async fn init_bottomless_replicator( - path: impl AsRef, - options: bottomless::replicator::Options, - restore_option: &RestoreOption, -) -> anyhow::Result<(bottomless::replicator::Replicator, bool)> { - tracing::debug!("Initializing bottomless replication"); - let path = path - .as_ref() - .to_str() - .ok_or_else(|| anyhow::anyhow!("Invalid db path"))? - .to_owned(); - let mut replicator = bottomless::replicator::Replicator::with_options(path, options).await?; - - let (generation, timestamp) = match restore_option { - RestoreOption::Latest | RestoreOption::Dump(_) => (None, None), - RestoreOption::Generation(generation) => (Some(*generation), None), - RestoreOption::PointInTime(timestamp) => (None, Some(*timestamp)), - }; - - let (action, did_recover) = replicator.restore(generation, timestamp).await?; - match action { - bottomless::replicator::RestoreAction::SnapshotMainDbFile => { - replicator.new_generation().await; - if let Some(_handle) = replicator.snapshot_main_db_file(true).await? { - tracing::trace!("got snapshot handle after restore with generation upgrade"); - } - // Restoration process only leaves the local WAL file if it was - // detected to be newer than its remote counterpart. - replicator.maybe_replicate_wal().await? - } - bottomless::replicator::RestoreAction::ReuseGeneration(gen) => { - replicator.set_generation(gen); - } - } - - Ok((replicator, did_recover)) -} - -async fn run_periodic_compactions(logger: Arc) -> anyhow::Result<()> { - // calling `ReplicationLogger::maybe_compact()` is cheap if the compaction does not actually - // take place, so we can afford to poll it very often for simplicity - let mut interval = tokio::time::interval(tokio::time::Duration::from_millis(1000)); - interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Delay); - - loop { - interval.tick().await; - let handle = BLOCKING_RT.spawn_blocking(enclose! {(logger) move || { - logger.maybe_compact() - }}); - handle - .await - .expect("Compaction task crashed") - .context("Compaction failed")?; - } -} - -fn check_fresh_db(path: &Path) -> crate::Result { - let is_fresh = !path.join("wallog").try_exists()?; - Ok(is_fresh) -} - -// Periodically check the storage used by the database and save it in the Stats structure. -// TODO: Once we have a separate fiber that does WAL checkpoints, running this routine -// right after checkpointing is exactly where it should be done. -async fn run_storage_monitor( - db_path: PathBuf, - stats: Weak, - encryption_config: Option, -) -> anyhow::Result<()> { - // on initialization, the database file doesn't exist yet, so we wait a bit for it to be - // created - tokio::time::sleep(Duration::from_secs(1)).await; - - let duration = tokio::time::Duration::from_secs(60); - let db_path: Arc = db_path.into(); - loop { - let db_path = db_path.clone(); - let Some(stats) = stats.upgrade() else { - return Ok(()); - }; - - let encryption_config = encryption_config.clone(); - let _ = tokio::task::spawn_blocking(move || { - // because closing the last connection interferes with opening a new one, we lazily - // initialize a connection here, and keep it alive for the entirety of the program. If we - // fail to open it, we wait for `duration` and try again later. - match open_conn(&db_path, Sqlite3WalManager::new(), Some(rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY), encryption_config) { - Ok(mut conn) => { - if let Ok(tx) = conn.transaction() { - let page_count = tx.query_row("pragma page_count;", [], |row| { row.get::(0) }); - let freelist_count = tx.query_row("pragma freelist_count;", [], |row| { row.get::(0) }); - if let (Ok(page_count), Ok(freelist_count)) = (page_count, freelist_count) { - let storage_bytes_used = (page_count - freelist_count) * 4096; - stats.set_storage_bytes_used(storage_bytes_used); - } - } - }, - Err(e) => { - tracing::warn!("failed to open connection for storager monitor: {e}, trying again in {duration:?}"); - }, - } - }).await; - - tokio::time::sleep(duration).await; - } -} diff --git a/libsql-server/src/namespace/store.rs b/libsql-server/src/namespace/store.rs index fbce8cd78b..a78e4f59b0 100644 --- a/libsql-server/src/namespace/store.rs +++ b/libsql-server/src/namespace/store.rs @@ -20,10 +20,10 @@ use crate::namespace::{NamespaceBottomlessDbId, NamespaceBottomlessDbIdInit, Nam use crate::stats::Stats; use super::broadcasters::{BroadcasterHandle, BroadcasterRegistry}; -use super::configurator::NamespaceConfigurators; +use super::configurator::{DynConfigurator, NamespaceConfigurators}; use super::meta_store::{MetaStore, MetaStoreHandle}; use super::schema_lock::SchemaLocksRegistry; -use super::{Namespace, NamespaceConfig, ResetCb, ResetOp, ResolveNamespacePathFn, RestoreOption}; +use super::{Namespace, ResetCb, ResetOp, ResolveNamespacePathFn, RestoreOption}; type NamespaceEntry = Arc>>; @@ -46,10 +46,10 @@ pub struct NamespaceStoreInner { allow_lazy_creation: bool, has_shutdown: AtomicBool, snapshot_at_shutdown: bool, - pub config: NamespaceConfig, schema_locks: SchemaLocksRegistry, broadcasters: BroadcasterRegistry, configurators: NamespaceConfigurators, + db_kind: DatabaseKind, } impl NamespaceStore { @@ -57,9 +57,9 @@ impl NamespaceStore { allow_lazy_creation: bool, snapshot_at_shutdown: bool, max_active_namespaces: usize, - config: NamespaceConfig, metadata: MetaStore, configurators: NamespaceConfigurators, + db_kind: DatabaseKind, ) -> crate::Result { tracing::trace!("Max active namespaces: {max_active_namespaces}"); let store = Cache::::builder() @@ -91,10 +91,10 @@ impl NamespaceStore { allow_lazy_creation, has_shutdown: AtomicBool::new(false), snapshot_at_shutdown, - config, schema_locks: Default::default(), broadcasters: Default::default(), configurators, + db_kind, }), }) } @@ -132,14 +132,8 @@ impl NamespaceStore { } } - Namespace::cleanup( - &self.inner.config, - &namespace, - &db_config, - prune_all, - bottomless_db_id_init, - ) - .await?; + self.cleanup(&namespace, &db_config, prune_all, bottomless_db_id_init) + .await?; tracing::info!("destroyed namespace: {namespace}"); @@ -181,15 +175,16 @@ impl NamespaceStore { let db_config = self.inner.metadata.handle(namespace.clone()); // destroy on-disk database - Namespace::cleanup( - &self.inner.config, + self.cleanup( &namespace, &db_config.get(), false, NamespaceBottomlessDbIdInit::FetchFromConfig, ) .await?; - let ns = self.make_namespace(&namespace, db_config, restore_option).await?; + let ns = self + .make_namespace(&namespace, db_config, restore_option) + .await?; lock.replace(ns); @@ -289,16 +284,17 @@ impl NamespaceStore { handle .store_and_maybe_flush(Some(to_config.into()), false) .await?; - let to_ns = Namespace::fork( - &self.inner.config, - from_ns, - from_config, - to.clone(), - handle.clone(), - timestamp, - self.clone(), - ) - .await?; + let to_ns = self + .get_configurator(&from_config.get()) + .fork( + from_ns, + from_config, + to.clone(), + handle.clone(), + timestamp, + self.clone(), + ) + .await?; to_lock.replace(to_ns); handle.flush().await?; @@ -377,23 +373,18 @@ impl NamespaceStore { config: MetaStoreHandle, restore_option: RestoreOption, ) -> crate::Result { - let configurator = match self.inner.config.db_kind { - DatabaseKind::Primary if config.get().is_shared_schema => { - self.inner.configurators.configure_schema()? - } - DatabaseKind::Primary => self.inner.configurators.configure_primary()?, - DatabaseKind::Replica => self.inner.configurators.configure_replica()?, - }; - let ns = configurator.setup( - &self.inner.config, - config, - restore_option, - namespace, - self.make_reset_cb(), - self.resolve_attach_fn(), - self.clone(), - self.broadcaster(namespace.clone()), - ).await?; + let ns = self + .get_configurator(&config.get()) + .setup( + config, + restore_option, + namespace, + self.make_reset_cb(), + self.resolve_attach_fn(), + self.clone(), + self.broadcaster(namespace.clone()), + ) + .await?; Ok(ns) } @@ -405,7 +396,9 @@ impl NamespaceStore { restore_option: RestoreOption, ) -> crate::Result { let init = async { - let ns = self.make_namespace(namespace, db_config, restore_option).await?; + let ns = self + .make_namespace(namespace, db_config, restore_option) + .await?; Ok(Some(ns)) }; @@ -521,4 +514,26 @@ impl NamespaceStore { pub(crate) fn schema_locks(&self) -> &SchemaLocksRegistry { &self.inner.schema_locks } + + fn get_configurator(&self, db_config: &DatabaseConfig) -> &DynConfigurator { + match self.inner.db_kind { + DatabaseKind::Primary if db_config.is_shared_schema => { + self.inner.configurators.configure_schema().unwrap() + } + DatabaseKind::Primary => self.inner.configurators.configure_primary().unwrap(), + DatabaseKind::Replica => self.inner.configurators.configure_replica().unwrap(), + } + } + + async fn cleanup( + &self, + namespace: &NamespaceName, + db_config: &DatabaseConfig, + prune_all: bool, + bottomless_db_id_init: NamespaceBottomlessDbIdInit, + ) -> crate::Result<()> { + self.get_configurator(db_config) + .cleanup(namespace, db_config, prune_all, bottomless_db_id_init) + .await + } } diff --git a/libsql-server/src/schema/scheduler.rs b/libsql-server/src/schema/scheduler.rs index 17ce655064..a8195cbbd0 100644 --- a/libsql-server/src/schema/scheduler.rs +++ b/libsql-server/src/schema/scheduler.rs @@ -809,10 +809,11 @@ mod test { use crate::connection::config::DatabaseConfig; use crate::database::DatabaseKind; use crate::namespace::configurator::{ - NamespaceConfigurators, PrimaryConfigurator, SchemaConfigurator, + BaseNamespaceConfig, NamespaceConfigurators, PrimaryConfigurator, PrimaryExtraConfig, + SchemaConfigurator, }; use crate::namespace::meta_store::{metastore_connection_maker, MetaStore}; - use crate::namespace::{NamespaceConfig, RestoreOption}; + use crate::namespace::RestoreOption; use crate::schema::SchedulerHandle; use super::super::migration::has_pending_migration_task; @@ -833,9 +834,9 @@ mod test { false, false, 10, - config, meta_store, - NamespaceConfigurators::default(), + config, + DatabaseKind::Primary ) .await .unwrap(); @@ -912,27 +913,41 @@ mod test { assert!(!block_write.load(std::sync::atomic::Ordering::Relaxed)); } - fn make_config(migration_scheduler: SchedulerHandle, path: &Path) -> NamespaceConfig { - NamespaceConfig { - db_kind: DatabaseKind::Primary, + fn make_config(migration_scheduler: SchedulerHandle, path: &Path) -> NamespaceConfigurators { + let mut configurators = NamespaceConfigurators::empty(); + let base_config = BaseNamespaceConfig { base_path: path.to_path_buf().into(), - max_log_size: 1000000000, - max_log_duration: None, extensions: Arc::new([]), stats_sender: tokio::sync::mpsc::channel(1).0, max_response_size: 100000000000000, max_total_response_size: 100000000000, - checkpoint_interval: None, max_concurrent_connections: Arc::new(Semaphore::new(10)), max_concurrent_requests: 10000, - encryption_config: None, - channel: None, - uri: None, + }; + + let primary_config = PrimaryExtraConfig { + max_log_size: 1000000000, + max_log_duration: None, bottomless_replication: None, scripted_backup: None, + checkpoint_interval: None, + }; + + let make_wal_manager = Arc::new(|| EitherWAL::A(Sqlite3WalManager::default())); + + configurators.with_schema(SchemaConfigurator::new( + base_config.clone(), + primary_config.clone(), + make_wal_manager.clone(), migration_scheduler, - make_wal_manager: Arc::new(|| EitherWAL::A(Sqlite3WalManager::default())), - } + )); + configurators.with_primary(PrimaryConfigurator::new( + base_config, + primary_config, + make_wal_manager.clone(), + )); + + configurators } #[tokio::test] @@ -950,9 +965,9 @@ mod test { false, false, 10, - config, meta_store, - NamespaceConfigurators::default(), + config, + DatabaseKind::Primary ) .await .unwrap(); @@ -1029,9 +1044,16 @@ mod test { .unwrap(); let (sender, _receiver) = mpsc::channel(100); let config = make_config(sender.clone().into(), tmp.path()); - let store = NamespaceStore::new(false, false, 10, config, meta_store, NamespaceConfigurators::default()) - .await - .unwrap(); + let store = NamespaceStore::new( + false, + false, + 10, + meta_store, + config, + DatabaseKind::Primary, + ) + .await + .unwrap(); store .with("ns".into(), |ns| { @@ -1056,10 +1078,7 @@ mod test { .unwrap(); let (sender, mut receiver) = mpsc::channel(100); let config = make_config(sender.clone().into(), tmp.path()); - let configurators = NamespaceConfigurators::default() - .with_schema(SchemaConfigurator) - .with_primary(PrimaryConfigurator); - let store = NamespaceStore::new(false, false, 10, config, meta_store, configurators) + let store = NamespaceStore::new(false, false, 10, meta_store, config, DatabaseKind::Primary) .await .unwrap(); let mut scheduler = Scheduler::new(store.clone(), maker().unwrap()) @@ -1132,9 +1151,16 @@ mod test { .unwrap(); let (sender, _receiver) = mpsc::channel(100); let config = make_config(sender.clone().into(), tmp.path()); - let store = NamespaceStore::new(false, false, 10, config, meta_store, NamespaceConfigurators::default()) - .await - .unwrap(); + let store = NamespaceStore::new( + false, + false, + 10, + meta_store, + config, + DatabaseKind::Primary + ) + .await + .unwrap(); let scheduler = Scheduler::new(store.clone(), maker().unwrap()) .await .unwrap(); From 0647711dd81736bcb1fa1f886b3076736becc4a9 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 6 Aug 2024 08:30:53 +0200 Subject: [PATCH 09/50] legacy configurators --- libsql-server/src/http/admin/stats.rs | 2 + libsql-server/src/lib.rs | 425 +++++++++++++++----------- libsql-server/src/namespace/store.rs | 13 +- libsql-server/tests/cluster/mod.rs | 29 +- 4 files changed, 279 insertions(+), 190 deletions(-) diff --git a/libsql-server/src/http/admin/stats.rs b/libsql-server/src/http/admin/stats.rs index f2948d4d7b..5fce92ba0a 100644 --- a/libsql-server/src/http/admin/stats.rs +++ b/libsql-server/src/http/admin/stats.rs @@ -140,10 +140,12 @@ pub(super) async fn handle_stats( State(app_state): State>>, Path(namespace): Path, ) -> crate::Result> { + dbg!(); let stats = app_state .namespaces .stats(NamespaceName::from_string(namespace)?) .await?; + dbg!(); let resp: StatsResponse = stats.as_ref().into(); Ok(Json(resp)) diff --git a/libsql-server/src/lib.rs b/libsql-server/src/lib.rs index 8bd3ea4fac..4188365e03 100644 --- a/libsql-server/src/lib.rs +++ b/libsql-server/src/lib.rs @@ -4,7 +4,6 @@ use std::alloc::Layout; use std::ffi::c_void; use std::mem::{align_of, size_of}; use std::path::{Path, PathBuf}; -use std::pin::Pin; use std::str::FromStr; use std::sync::{Arc, Weak}; @@ -29,10 +28,10 @@ use auth::Auth; use config::{ AdminApiConfig, DbConfig, HeartbeatConfig, RpcClientConfig, RpcServerConfig, UserApiConfig, }; -use futures::future::ready; use futures::Future; use http::user::UserApi; use hyper::client::HttpConnector; +use hyper::Uri; use hyper_rustls::HttpsConnector; #[cfg(feature = "durable-wal")] use libsql_storage::{DurableWalManager, LockManager}; @@ -41,10 +40,6 @@ use libsql_sys::wal::either::Either as EitherWAL; #[cfg(feature = "durable-wal")] use libsql_sys::wal::either::Either3 as EitherWAL; use libsql_sys::wal::Sqlite3WalManager; -use libsql_wal::checkpointer::LibsqlCheckpointer; -use libsql_wal::registry::WalRegistry; -use libsql_wal::storage::NoStorage; -use libsql_wal::wal::LibsqlWalManager; use namespace::meta_store::MetaStoreHandle; use namespace::NamespaceName; use net::Connector; @@ -55,15 +50,19 @@ use tokio::runtime::Runtime; use tokio::sync::{mpsc, Notify, Semaphore}; use tokio::task::JoinSet; use tokio::time::Duration; +use tonic::transport::Channel; use url::Url; use utils::services::idle_shutdown::IdleShutdownKicker; use self::config::MetaStoreConfig; -use self::connection::connection_manager::InnerWalManager; -use self::namespace::configurator::{BaseNamespaceConfig, NamespaceConfigurators, PrimaryConfigurator, PrimaryExtraConfig, ReplicaConfigurator, SchemaConfigurator}; +use self::namespace::configurator::{ + BaseNamespaceConfig, NamespaceConfigurators, PrimaryConfigurator, PrimaryExtraConfig, + ReplicaConfigurator, SchemaConfigurator, +}; use self::namespace::NamespaceStore; use self::net::AddrIncoming; use self::replication::script_backup_manager::{CommandHandler, ScriptBackupManager}; +use self::schema::SchedulerHandle; pub mod auth; mod broadcaster; @@ -424,33 +423,44 @@ where let extensions = self.db_config.validate_extensions()?; let user_auth_strategy = self.user_api_config.auth_strategy.clone(); - let service_shutdown = Arc::new(Notify::new()); - let scripted_backup = match self.db_config.snapshot_exec { Some(ref command) => { let (scripted_backup, script_backup_task) = ScriptBackupManager::new(&self.path, CommandHandler::new(command.to_string())) .await?; - join_set.spawn(script_backup_task.run()); + self.spawn_until_shutdown(&mut join_set, script_backup_task.run()); Some(scripted_backup) } None => None, }; - let (channel, uri) = match self.rpc_client_config { - Some(ref config) => { - let (channel, uri) = config.configure().await?; - (Some(channel), Some(uri)) - } - None => (None, None), + let db_kind = match self.rpc_client_config { + Some(_) => DatabaseKind::Replica, + _ => DatabaseKind::Primary, }; + let client_config = self.get_client_config().await?; let (scheduler_sender, scheduler_receiver) = mpsc::channel(128); - let (stats_sender, stats_receiver) = mpsc::channel(1024); - // chose the wal backend - let (make_wal_manager, registry_shutdown) = self.configure_wal_manager(&mut join_set)?; + let base_config = BaseNamespaceConfig { + base_path: self.path.clone(), + extensions, + stats_sender, + max_response_size: self.db_config.max_response_size, + max_total_response_size: self.db_config.max_total_response_size, + max_concurrent_connections: Arc::new(Semaphore::new(self.max_concurrent_connections)), + max_concurrent_requests: self.db_config.max_concurrent_requests, + }; + + let configurators = self + .make_configurators( + base_config, + scripted_backup, + scheduler_sender.into(), + client_config.clone(), + ) + .await?; let (metastore_conn_maker, meta_store_wal_manager) = metastore_connection_maker(self.meta_store_config.bottomless.clone(), &self.path) @@ -464,60 +474,6 @@ where ) .await?; - let base_config = BaseNamespaceConfig { - base_path: self.path.clone(), - extensions, - stats_sender, - max_response_size: self.db_config.max_response_size, - max_total_response_size: self.db_config.max_total_response_size, - max_concurrent_connections: Arc::new(Semaphore::new(self.max_concurrent_connections)), - max_concurrent_requests: self.db_config.max_concurrent_requests, - }; - - let mut configurators = NamespaceConfigurators::default(); - - let db_kind = match channel.clone().zip(uri.clone()) { - // replica mode - Some((channel, uri)) => { - let replica_configurator = ReplicaConfigurator::new( - base_config, - channel, - uri, - make_wal_manager, - ); - configurators.with_replica(replica_configurator); - DatabaseKind::Replica - } - // primary mode - None => { - let primary_config = PrimaryExtraConfig { - max_log_size: self.db_config.max_log_size, - max_log_duration: self.db_config.max_log_duration.map(Duration::from_secs_f32), - bottomless_replication: self.db_config.bottomless_replication.clone(), - scripted_backup, - checkpoint_interval: self.db_config.checkpoint_interval, - }; - - let primary_configurator = PrimaryConfigurator::new( - base_config.clone(), - primary_config.clone(), - make_wal_manager.clone(), - ); - - let schema_configurator = SchemaConfigurator::new( - base_config.clone(), - primary_config, - make_wal_manager.clone(), - scheduler_sender.into(), - ); - - configurators.with_schema(schema_configurator); - configurators.with_primary(primary_configurator); - - DatabaseKind::Primary - }, - }; - let namespace_store: NamespaceStore = NamespaceStore::new( db_kind.is_replica(), self.db_config.snapshot_at_shutdown, @@ -528,27 +484,9 @@ where ) .await?; - let meta_conn = metastore_conn_maker()?; - let scheduler = Scheduler::new(namespace_store.clone(), meta_conn).await?; - - join_set.spawn(async move { - scheduler.run(scheduler_receiver).await; - Ok(()) - }); self.spawn_monitoring_tasks(&mut join_set, stats_receiver)?; - // eagerly load the default namespace when namespaces are disabled - if self.disable_namespaces && db_kind.is_primary() { - namespace_store - .create( - NamespaceName::default(), - namespace::RestoreOption::Latest, - Default::default(), - ) - .await?; - } - // if namespaces are enabled, then bottomless must have set DB ID if !self.disable_namespaces { if let Some(bottomless) = &self.db_config.bottomless_replication { @@ -563,7 +501,7 @@ where let proxy_service = ProxyService::new(namespace_store.clone(), None, self.disable_namespaces); // Garbage collect proxy clients every 30 seconds - join_set.spawn({ + self.spawn_until_shutdown(&mut join_set, { let clients = proxy_service.clients(); async move { loop { @@ -572,7 +510,8 @@ where } } }); - join_set.spawn(run_rpc_server( + + self.spawn_until_shutdown(&mut join_set, run_rpc_server( proxy_service, config.acceptor, config.tls_config, @@ -584,9 +523,28 @@ where let shutdown_timeout = self.shutdown_timeout.clone(); let shutdown = self.shutdown.clone(); + let service_shutdown = Arc::new(Notify::new()); // setup user-facing rpc services match db_kind { DatabaseKind::Primary => { + // The migration scheduler is only useful on the primary + let meta_conn = metastore_conn_maker()?; + let scheduler = Scheduler::new(namespace_store.clone(), meta_conn).await?; + self.spawn_until_shutdown(&mut join_set, async move { + scheduler.run(scheduler_receiver).await; + Ok(()) + }); + + if self.disable_namespaces { + namespace_store + .create( + NamespaceName::default(), + namespace::RestoreOption::Latest, + Default::default(), + ) + .await?; + } + let replication_svc = ReplicationLogService::new( namespace_store.clone(), idle_shutdown_kicker.clone(), @@ -602,7 +560,7 @@ where ); // Garbage collect proxy clients every 30 seconds - join_set.spawn({ + self.spawn_until_shutdown(&mut join_set, { let clients = proxy_svc.clients(); async move { loop { @@ -623,16 +581,19 @@ where .configure(&mut join_set); } DatabaseKind::Replica => { + dbg!(); + let (channel, uri) = client_config.clone().unwrap(); let replication_svc = - ReplicationLogProxyService::new(channel.clone().unwrap(), uri.clone().unwrap()); + ReplicationLogProxyService::new(channel.clone(), uri.clone()); let proxy_svc = ReplicaProxyService::new( - channel.clone().unwrap(), - uri.clone().unwrap(), + channel, + uri, namespace_store.clone(), user_auth_strategy.clone(), self.disable_namespaces, ); + dbg!(); self.make_services( namespace_store.clone(), idle_shutdown_kicker, @@ -642,6 +603,7 @@ where service_shutdown.clone(), ) .configure(&mut join_set); + dbg!(); } }; @@ -651,7 +613,6 @@ where join_set.shutdown().await; service_shutdown.notify_waiters(); namespace_store.shutdown().await?; - registry_shutdown.await?; Ok::<_, crate::Error>(()) }; @@ -680,100 +641,200 @@ where Ok(()) } - fn setup_shutdown(&self) -> Option { - let shutdown_notify = self.shutdown.clone(); - self.idle_shutdown_timeout.map(|d| { - IdleShutdownKicker::new(d, self.initial_idle_shutdown_timeout, shutdown_notify) - }) - } - - fn configure_wal_manager( + async fn make_configurators( &self, - join_set: &mut JoinSet>, - ) -> anyhow::Result<( - Arc InnerWalManager + Sync + Send + 'static>, - Pin> + Send + Sync + 'static>>, - )> { - let wal_path = self.path.join("wals"); - let enable_libsql_wal_test = { - let is_primary = self.rpc_server_config.is_some(); - let is_libsql_wal_test = std::env::var("LIBSQL_WAL_TEST").is_ok(); - is_primary && is_libsql_wal_test - }; - let use_libsql_wal = - self.use_custom_wal == Some(CustomWAL::LibsqlWal) || enable_libsql_wal_test; - if !use_libsql_wal { - if wal_path.try_exists()? { - anyhow::bail!("database was previously setup to use libsql-wal"); - } - } - - if self.use_custom_wal.is_some() { - if self.db_config.bottomless_replication.is_some() { - anyhow::bail!("bottomless not supported with custom WAL"); - } - if self.rpc_client_config.is_some() { - anyhow::bail!("custom WAL not supported in replica mode"); + base_config: BaseNamespaceConfig, + scripted_backup: Option, + migration_scheduler_handle: SchedulerHandle, + client_config: Option<(Channel, Uri)>, + ) -> anyhow::Result { + match self.use_custom_wal { + Some(CustomWAL::LibsqlWal) => self.libsql_wal_configurators(), + #[cfg(feature = "durable-wal")] + Some(CustomWAL::DurableWal) => self.durable_wal_configurators(), + None => { + self.legacy_configurators( + base_config, + scripted_backup, + migration_scheduler_handle, + client_config, + ) + .await } } + } - let namespace_resolver = |path: &Path| { - NamespaceName::from_string( - path.parent() - .unwrap() - .file_name() - .unwrap() - .to_str() - .unwrap() - .to_string(), - ) - .unwrap() - .into() - }; - - match self.use_custom_wal { - Some(CustomWAL::LibsqlWal) => { - let (sender, receiver) = tokio::sync::mpsc::channel(64); - let registry = Arc::new(WalRegistry::new(wal_path, NoStorage, sender)?); - let checkpointer = LibsqlCheckpointer::new(registry.clone(), receiver, 8); - join_set.spawn(async move { - checkpointer.run().await; - Ok(()) - }); + fn libsql_wal_configurators(&self) -> anyhow::Result { + todo!() + } - let wal = LibsqlWalManager::new(registry.clone(), Arc::new(namespace_resolver)); - let shutdown_notify = self.shutdown.clone(); - let shutdown_fut = Box::pin(async move { - shutdown_notify.notified().await; - registry.shutdown().await?; - Ok(()) - }); + #[cfg(feature = "durable-wal")] + fn durable_wal_configurators(&self) -> anyhow::Result { + todo!(); + } - tracing::info!("using libsql wal"); - Ok((Arc::new(move || EitherWAL::B(wal.clone())), shutdown_fut)) + fn spawn_until_shutdown(&self, join_set: &mut JoinSet>, fut: F) + where + F: Future> + Send + 'static, + { + let shutdown = self.shutdown.clone(); + join_set.spawn(async move { + tokio::select! { + _ = shutdown.notified() => Ok(()), + ret = fut => ret } - #[cfg(feature = "durable-wal")] - Some(CustomWAL::DurableWal) => { - tracing::info!("using durable wal"); - let lock_manager = Arc::new(std::sync::Mutex::new(LockManager::new())); - let wal = DurableWalManager::new( - lock_manager, - namespace_resolver, - self.storage_server_address.clone(), - ); - Ok(( - Arc::new(move || EitherWAL::C(wal.clone())), - Box::pin(ready(Ok(()))), - )) + }); + } + + async fn legacy_configurators( + &self, + base_config: BaseNamespaceConfig, + scripted_backup: Option, + migration_scheduler_handle: SchedulerHandle, + client_config: Option<(Channel, Uri)>, + ) -> anyhow::Result { + let make_wal_manager = Arc::new(|| EitherWAL::A(Sqlite3WalManager::default())); + let mut configurators = NamespaceConfigurators::empty(); + + match client_config { + // replica mode + Some((channel, uri)) => { + let replica_configurator = + ReplicaConfigurator::new(base_config, channel, uri, make_wal_manager); + configurators.with_replica(replica_configurator); } + // primary mode None => { - tracing::info!("using sqlite3 wal"); - Ok(( - Arc::new(|| EitherWAL::A(Sqlite3WalManager::default())), - Box::pin(ready(Ok(()))), - )) + let primary_config = PrimaryExtraConfig { + max_log_size: self.db_config.max_log_size, + max_log_duration: self.db_config.max_log_duration.map(Duration::from_secs_f32), + bottomless_replication: self.db_config.bottomless_replication.clone(), + scripted_backup, + checkpoint_interval: self.db_config.checkpoint_interval, + }; + + let primary_configurator = PrimaryConfigurator::new( + base_config.clone(), + primary_config.clone(), + make_wal_manager.clone(), + ); + + let schema_configurator = SchemaConfigurator::new( + base_config.clone(), + primary_config, + make_wal_manager.clone(), + migration_scheduler_handle, + ); + + configurators.with_schema(schema_configurator); + configurators.with_primary(primary_configurator); } } + + Ok(configurators) + } + + fn setup_shutdown(&self) -> Option { + let shutdown_notify = self.shutdown.clone(); + self.idle_shutdown_timeout.map(|d| { + IdleShutdownKicker::new(d, self.initial_idle_shutdown_timeout, shutdown_notify) + }) + } + + // fn configure_wal_manager( + // &self, + // join_set: &mut JoinSet>, + // ) -> anyhow::Result<( + // Arc InnerWalManager + Sync + Send + 'static>, + // Pin> + Send + Sync + 'static>>, + // )> { + // let wal_path = self.path.join("wals"); + // let enable_libsql_wal_test = { + // let is_primary = self.rpc_server_config.is_some(); + // let is_libsql_wal_test = std::env::var("LIBSQL_WAL_TEST").is_ok(); + // is_primary && is_libsql_wal_test + // }; + // let use_libsql_wal = + // self.use_custom_wal == Some(CustomWAL::LibsqlWal) || enable_libsql_wal_test; + // if !use_libsql_wal { + // if wal_path.try_exists()? { + // anyhow::bail!("database was previously setup to use libsql-wal"); + // } + // } + // + // if self.use_custom_wal.is_some() { + // if self.db_config.bottomless_replication.is_some() { + // anyhow::bail!("bottomless not supported with custom WAL"); + // } + // if self.rpc_client_config.is_some() { + // anyhow::bail!("custom WAL not supported in replica mode"); + // } + // } + // + // let namespace_resolver = |path: &Path| { + // NamespaceName::from_string( + // path.parent() + // .unwrap() + // .file_name() + // .unwrap() + // .to_str() + // .unwrap() + // .to_string(), + // ) + // .unwrap() + // .into() + // }; + // + // match self.use_custom_wal { + // Some(CustomWAL::LibsqlWal) => { + // let (sender, receiver) = tokio::sync::mpsc::channel(64); + // let registry = Arc::new(WalRegistry::new(wal_path, NoStorage, sender)?); + // let checkpointer = LibsqlCheckpointer::new(registry.clone(), receiver, 8); + // join_set.spawn(async move { + // checkpointer.run().await; + // Ok(()) + // }); + // + // let wal = LibsqlWalManager::new(registry.clone(), Arc::new(namespace_resolver)); + // let shutdown_notify = self.shutdown.clone(); + // let shutdown_fut = Box::pin(async move { + // shutdown_notify.notified().await; + // registry.shutdown().await?; + // Ok(()) + // }); + // + // tracing::info!("using libsql wal"); + // Ok((Arc::new(move || EitherWAL::B(wal.clone())), shutdown_fut)) + // } + // #[cfg(feature = "durable-wal")] + // Some(CustomWAL::DurableWal) => { + // tracing::info!("using durable wal"); + // let lock_manager = Arc::new(std::sync::Mutex::new(LockManager::new())); + // let wal = DurableWalManager::new( + // lock_manager, + // namespace_resolver, + // self.storage_server_address.clone(), + // ); + // Ok(( + // Arc::new(move || EitherWAL::C(wal.clone())), + // Box::pin(ready(Ok(()))), + // )) + // } + // None => { + // tracing::info!("using sqlite3 wal"); + // Ok(( + // Arc::new(|| EitherWAL::A(Sqlite3WalManager::default())), + // Box::pin(ready(Ok(()))), + // )) + // } + // } + // } + + async fn get_client_config(&self) -> anyhow::Result> { + match self.rpc_client_config { + Some(ref config) => Ok(Some(config.configure().await?)), + None => Ok(None), + } } } diff --git a/libsql-server/src/namespace/store.rs b/libsql-server/src/namespace/store.rs index a78e4f59b0..b2b5d33032 100644 --- a/libsql-server/src/namespace/store.rs +++ b/libsql-server/src/namespace/store.rs @@ -327,6 +327,7 @@ impl NamespaceStore { where Fun: FnOnce(&Namespace) -> R, { + dbg!(); if namespace != NamespaceName::default() && !self.inner.metadata.exists(&namespace) && !self.inner.allow_lazy_creation @@ -334,6 +335,7 @@ impl NamespaceStore { return Err(Error::NamespaceDoesntExist(namespace.to_string())); } + dbg!(); let f = { let name = namespace.clone(); move |ns: NamespaceEntry| async move { @@ -346,7 +348,9 @@ impl NamespaceStore { } }; + dbg!(); let handle = self.inner.metadata.handle(namespace.to_owned()); + dbg!(); f(self .load_namespace(&namespace, handle, RestoreOption::Latest) .await?) @@ -373,6 +377,7 @@ impl NamespaceStore { config: MetaStoreHandle, restore_option: RestoreOption, ) -> crate::Result { + dbg!(); let ns = self .get_configurator(&config.get()) .setup( @@ -386,6 +391,7 @@ impl NamespaceStore { ) .await?; + dbg!(); Ok(ns) } @@ -395,13 +401,17 @@ impl NamespaceStore { db_config: MetaStoreHandle, restore_option: RestoreOption, ) -> crate::Result { + dbg!(); let init = async { + dbg!(); let ns = self .make_namespace(namespace, db_config, restore_option) .await?; + dbg!(); Ok(Some(ns)) }; + dbg!(); let before_load = Instant::now(); let ns = self .inner @@ -410,7 +420,8 @@ impl NamespaceStore { namespace.clone(), init.map_ok(|ns| Arc::new(RwLock::new(ns))), ) - .await?; + .await.map_err(|e| dbg!(e))?; + dbg!(); NAMESPACE_LOAD_LATENCY.record(before_load.elapsed()); Ok(ns) diff --git a/libsql-server/tests/cluster/mod.rs b/libsql-server/tests/cluster/mod.rs index 1171d4a5d0..8f214bd05e 100644 --- a/libsql-server/tests/cluster/mod.rs +++ b/libsql-server/tests/cluster/mod.rs @@ -149,23 +149,29 @@ fn sync_many_replica() { let mut sim = Builder::new() .simulation_duration(Duration::from_secs(1000)) .build(); + dbg!(); make_cluster(&mut sim, NUM_REPLICA, true); + dbg!(); sim.client("client", async { let db = Database::open_remote_with_connector("http://primary:8080", "", TurmoilConnector)?; let conn = db.connect()?; + dbg!(); conn.execute("create table test (x)", ()).await?; + dbg!(); conn.execute("insert into test values (42)", ()).await?; + dbg!(); async fn get_frame_no(url: &str) -> Option { let client = Client::new(); + dbg!(); Some( - client - .get(url) - .await - .unwrap() - .json::() - .await + dbg!(client + .get(url) + .await + .unwrap() + .json::() + .await) .unwrap() .get("replication_index")? .as_u64() @@ -173,6 +179,7 @@ fn sync_many_replica() { ) } + dbg!(); let primary_fno = loop { if let Some(fno) = get_frame_no("http://primary:9090/v1/namespaces/default/stats").await { @@ -180,13 +187,15 @@ fn sync_many_replica() { } }; + dbg!(); // wait for all replicas to sync let mut join_set = JoinSet::new(); for i in 0..NUM_REPLICA { join_set.spawn(async move { let uri = format!("http://replica{i}:9090/v1/namespaces/default/stats"); + dbg!(); loop { - if let Some(replica_fno) = get_frame_no(&uri).await { + if let Some(replica_fno) = dbg!(get_frame_no(&uri).await) { if replica_fno == primary_fno { break; } @@ -196,8 +205,10 @@ fn sync_many_replica() { }); } + dbg!(); while join_set.join_next().await.is_some() {} + dbg!(); for i in 0..NUM_REPLICA { let db = Database::open_remote_with_connector( format!("http://replica{i}:8080"), @@ -212,8 +223,10 @@ fn sync_many_replica() { )); } + dbg!(); let client = Client::new(); + dbg!(); let stats = client .get("http://primary:9090/v1/namespaces/default/stats") .await? @@ -221,12 +234,14 @@ fn sync_many_replica() { .await .unwrap(); + dbg!(); let stat = stats .get("embedded_replica_frames_replicated") .unwrap() .as_u64() .unwrap(); + dbg!(); assert_eq!(stat, 0); Ok(()) From 76558e8df4d1927e7bd9fee7da4821f1a07a87f6 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Tue, 6 Aug 2024 13:33:23 +0400 Subject: [PATCH 10/50] fix behaviour of VACUUM for vector indices to make rowid consistent between shadow tables and base table --- libsql-sqlite3/src/vacuum.c | 26 ++++++++++++++++++ libsql-sqlite3/src/vectorIndex.c | 28 +------------------- libsql-sqlite3/test/libsql_vector_index.test | 9 +++++-- 3 files changed, 34 insertions(+), 29 deletions(-) diff --git a/libsql-sqlite3/src/vacuum.c b/libsql-sqlite3/src/vacuum.c index c0ae4bc1e1..d927a8d5a6 100644 --- a/libsql-sqlite3/src/vacuum.c +++ b/libsql-sqlite3/src/vacuum.c @@ -17,6 +17,10 @@ #include "sqliteInt.h" #include "vdbeInt.h" +#ifndef SQLITE_OMIT_VECTOR +#include "vectorIndexInt.h" +#endif + #if !defined(SQLITE_OMIT_VACUUM) && !defined(SQLITE_OMIT_ATTACH) /* @@ -294,6 +298,27 @@ SQLITE_NOINLINE int sqlite3RunVacuum( if( rc!=SQLITE_OK ) goto end_of_vacuum; db->init.iDb = 0; +#ifndef SQLITE_OMIT_VECTOR + // shadow tables for vector index will be populated automatically during CREATE INDEX command + // so we must skip them at this step + if( sqlite3FindTable(db, VECTOR_INDEX_GLOBAL_META_TABLE, zDbMain) != NULL ){ + rc = execSqlF(db, pzErrMsg, + "SELECT'INSERT INTO vacuum_db.'||quote(name)" + "||' SELECT*FROM\"%w\".'||quote(name)" + "FROM vacuum_db.sqlite_schema " + "WHERE type='table'AND coalesce(rootpage,1)>0 AND name NOT IN (SELECT name||'_shadow' FROM " VECTOR_INDEX_GLOBAL_META_TABLE ")", + zDbMain + ); + }else{ + rc = execSqlF(db, pzErrMsg, + "SELECT'INSERT INTO vacuum_db.'||quote(name)" + "||' SELECT*FROM\"%w\".'||quote(name)" + "FROM vacuum_db.sqlite_schema " + "WHERE type='table'AND coalesce(rootpage,1)>0 AND name", + zDbMain + ); + } +#else /* Loop through the tables in the main database. For each, do ** an "INSERT INTO vacuum_db.xxx SELECT * FROM main.xxx;" to copy ** the contents to the temporary database. @@ -305,6 +330,7 @@ SQLITE_NOINLINE int sqlite3RunVacuum( "WHERE type='table'AND coalesce(rootpage,1)>0", zDbMain ); +#endif assert( (db->mDbFlags & DBFLAG_Vacuum)!=0 ); db->mDbFlags &= ~DBFLAG_Vacuum; if( rc!=SQLITE_OK ) goto end_of_vacuum; diff --git a/libsql-sqlite3/src/vectorIndex.c b/libsql-sqlite3/src/vectorIndex.c index 78266ed462..d520419a41 100644 --- a/libsql-sqlite3/src/vectorIndex.c +++ b/libsql-sqlite3/src/vectorIndex.c @@ -49,11 +49,6 @@ ** VectorIdxParams utilities ****************************************************************************/ -// VACUUM creates tables and indices first and only then populate data -// we need to ignore inserts from 'INSERT INTO vacuum.t SELECT * FROM t' statements because -// all shadow tables will be populated by VACUUM process during regular process of table copy -#define IsVacuum(db) ((db->mDbFlags&DBFLAG_Vacuum)!=0) - void vectorIdxParamsInit(VectorIdxParams *pParams, u8 *pBinBuf, int nBinSize) { assert( nBinSize <= VECTOR_INDEX_PARAMS_BUF_SIZE ); @@ -772,10 +767,6 @@ int vectorIndexDrop(sqlite3 *db, const char *zDbSName, const char *zIdxName) { // this is done to prevent unrecoverable situations where index were dropped but index parameters deletion failed and second attempt will fail on first step int rcIdx, rcParams; - if( IsVacuum(db) ){ - return SQLITE_OK; - } - assert( zDbSName != NULL ); rcIdx = diskAnnDropIndex(db, zDbSName, zIdxName); @@ -786,10 +777,6 @@ int vectorIndexDrop(sqlite3 *db, const char *zDbSName, const char *zIdxName) { int vectorIndexClear(sqlite3 *db, const char *zDbSName, const char *zIdxName) { assert( zDbSName != NULL ); - if( IsVacuum(db) ){ - return SQLITE_OK; - } - return diskAnnClearIndex(db, zDbSName, zIdxName); } @@ -799,7 +786,7 @@ int vectorIndexClear(sqlite3 *db, const char *zDbSName, const char *zIdxName) { * this made intentionally in order to natively support upload of SQLite dumps * * dump populates tables first and create indices after - * so we must omit them because shadow tables already filled + * so we must omit index refill setp because shadow tables already filled * * 1. in case of any error :-1 returned (and pParse errMsg is populated with some error message) * 2. if vector index must not be created : 0 returned @@ -817,10 +804,6 @@ int vectorIndexCreate(Parse *pParse, const Index *pIdx, const char *zDbSName, co int hasLibsqlVectorIdxFn = 0, hasCollation = 0; const char *pzErrMsg; - if( IsVacuum(pParse->db) ){ - return CREATE_IGNORE; - } - assert( zDbSName != NULL ); sqlite3 *db = pParse->db; @@ -970,7 +953,6 @@ int vectorIndexSearch( VectorIdxParams idxParams; vectorIdxParamsInit(&idxParams, NULL, 0); - assert( !IsVacuum(db) ); assert( zDbSName != NULL ); if( argc != 3 ){ @@ -1055,10 +1037,6 @@ int vectorIndexInsert( int rc; VectorInRow vectorInRow; - if( IsVacuum(pCur->db) ){ - return SQLITE_OK; - } - rc = vectorInRowAlloc(pCur->db, pRecord, &vectorInRow, pzErrMsg); if( rc != SQLITE_OK ){ return rc; @@ -1078,10 +1056,6 @@ int vectorIndexDelete( ){ VectorInRow payload; - if( IsVacuum(pCur->db) ){ - return SQLITE_OK; - } - payload.pVector = NULL; payload.nKeys = r->nField - 1; payload.pKeyValues = r->aMem + 1; diff --git a/libsql-sqlite3/test/libsql_vector_index.test b/libsql-sqlite3/test/libsql_vector_index.test index 19d31ba19c..7308b2d93f 100644 --- a/libsql-sqlite3/test/libsql_vector_index.test +++ b/libsql-sqlite3/test/libsql_vector_index.test @@ -236,12 +236,17 @@ do_execsql_test vector-attach { do_execsql_test vector-vacuum { CREATE TABLE t_vacuum ( emb FLOAT32(2) ); - INSERT INTO t_vacuum VALUES (vector('[1,2]')), (vector('[3,4]')); + INSERT INTO t_vacuum VALUES (vector('[1,2]')), (vector('[3,4]')), (vector('[5,6]')); CREATE INDEX t_vacuum_idx ON t_vacuum(libsql_vector_idx(emb)); VACUUM; SELECT COUNT(*) FROM t_vacuum; SELECT COUNT(*) FROM t_vacuum_idx_shadow; -} {2 2} + DELETE FROM t_vacuum WHERE rowid = 2; + VACUUM; + SELECT * FROM vector_top_k('t_vacuum_idx', vector('[1,2]'), 3); + SELECT * FROM vector_top_k('t_vacuum_idx', vector('[5,6]'), 3); + SELECT * FROM vector_top_k('t_vacuum_idx', vector('[3,4]'), 3); +} {3 3 1 2 2 1 2 1} do_execsql_test vector-many-columns { CREATE TABLE t_many ( i INTEGER PRIMARY KEY, e1 FLOAT32(2), e2 FLOAT32(2) ); From 853143d77b94b26653a87d45134e2b77476096b3 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Tue, 6 Aug 2024 13:48:21 +0400 Subject: [PATCH 11/50] build bundles --- .../SQLite3MultipleCiphers/src/sqlite3.c | 55 ++++++++++--------- libsql-ffi/bundled/bindings/bindgen.rs | 26 ++++++++- libsql-ffi/bundled/src/sqlite3.c | 55 ++++++++++--------- 3 files changed, 79 insertions(+), 57 deletions(-) diff --git a/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c b/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c index c22f35046f..ec692baa53 100644 --- a/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c +++ b/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c @@ -69,6 +69,7 @@ ** src/test2.c ** src/test3.c ** src/test8.c +** src/vacuum.c ** src/vdbe.c ** src/vdbeInt.h ** src/vdbeapi.c @@ -155950,6 +155951,10 @@ SQLITE_PRIVATE void sqlite3UpsertDoUpdate( /* #include "sqliteInt.h" */ /* #include "vdbeInt.h" */ +#ifndef SQLITE_OMIT_VECTOR +/* #include "vectorIndexInt.h" */ +#endif + #if !defined(SQLITE_OMIT_VACUUM) && !defined(SQLITE_OMIT_ATTACH) /* @@ -156227,6 +156232,27 @@ SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3RunVacuum( if( rc!=SQLITE_OK ) goto end_of_vacuum; db->init.iDb = 0; +#ifndef SQLITE_OMIT_VECTOR + // shadow tables for vector index will be populated automatically during CREATE INDEX command + // so we must skip them at this step + if( sqlite3FindTable(db, VECTOR_INDEX_GLOBAL_META_TABLE, zDbMain) != NULL ){ + rc = execSqlF(db, pzErrMsg, + "SELECT'INSERT INTO vacuum_db.'||quote(name)" + "||' SELECT*FROM\"%w\".'||quote(name)" + "FROM vacuum_db.sqlite_schema " + "WHERE type='table'AND coalesce(rootpage,1)>0 AND name NOT IN (SELECT name||'_shadow' FROM " VECTOR_INDEX_GLOBAL_META_TABLE ")", + zDbMain + ); + }else{ + rc = execSqlF(db, pzErrMsg, + "SELECT'INSERT INTO vacuum_db.'||quote(name)" + "||' SELECT*FROM\"%w\".'||quote(name)" + "FROM vacuum_db.sqlite_schema " + "WHERE type='table'AND coalesce(rootpage,1)>0 AND name", + zDbMain + ); + } +#else /* Loop through the tables in the main database. For each, do ** an "INSERT INTO vacuum_db.xxx SELECT * FROM main.xxx;" to copy ** the contents to the temporary database. @@ -156238,6 +156264,7 @@ SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3RunVacuum( "WHERE type='table'AND coalesce(rootpage,1)>0", zDbMain ); +#endif assert( (db->mDbFlags & DBFLAG_Vacuum)!=0 ); db->mDbFlags &= ~DBFLAG_Vacuum; if( rc!=SQLITE_OK ) goto end_of_vacuum; @@ -213656,11 +213683,6 @@ int vectorF64ParseSqliteBlob( ** VectorIdxParams utilities ****************************************************************************/ -// VACUUM creates tables and indices first and only then populate data -// we need to ignore inserts from 'INSERT INTO vacuum.t SELECT * FROM t' statements because -// all shadow tables will be populated by VACUUM process during regular process of table copy -#define IsVacuum(db) ((db->mDbFlags&DBFLAG_Vacuum)!=0) - void vectorIdxParamsInit(VectorIdxParams *pParams, u8 *pBinBuf, int nBinSize) { assert( nBinSize <= VECTOR_INDEX_PARAMS_BUF_SIZE ); @@ -214379,10 +214401,6 @@ int vectorIndexDrop(sqlite3 *db, const char *zDbSName, const char *zIdxName) { // this is done to prevent unrecoverable situations where index were dropped but index parameters deletion failed and second attempt will fail on first step int rcIdx, rcParams; - if( IsVacuum(db) ){ - return SQLITE_OK; - } - assert( zDbSName != NULL ); rcIdx = diskAnnDropIndex(db, zDbSName, zIdxName); @@ -214393,10 +214411,6 @@ int vectorIndexDrop(sqlite3 *db, const char *zDbSName, const char *zIdxName) { int vectorIndexClear(sqlite3 *db, const char *zDbSName, const char *zIdxName) { assert( zDbSName != NULL ); - if( IsVacuum(db) ){ - return SQLITE_OK; - } - return diskAnnClearIndex(db, zDbSName, zIdxName); } @@ -214406,7 +214420,7 @@ int vectorIndexClear(sqlite3 *db, const char *zDbSName, const char *zIdxName) { * this made intentionally in order to natively support upload of SQLite dumps * * dump populates tables first and create indices after - * so we must omit them because shadow tables already filled + * so we must omit index refill setp because shadow tables already filled * * 1. in case of any error :-1 returned (and pParse errMsg is populated with some error message) * 2. if vector index must not be created : 0 returned @@ -214424,10 +214438,6 @@ int vectorIndexCreate(Parse *pParse, const Index *pIdx, const char *zDbSName, co int hasLibsqlVectorIdxFn = 0, hasCollation = 0; const char *pzErrMsg; - if( IsVacuum(pParse->db) ){ - return CREATE_IGNORE; - } - assert( zDbSName != NULL ); sqlite3 *db = pParse->db; @@ -214577,7 +214587,6 @@ int vectorIndexSearch( VectorIdxParams idxParams; vectorIdxParamsInit(&idxParams, NULL, 0); - assert( !IsVacuum(db) ); assert( zDbSName != NULL ); if( argc != 3 ){ @@ -214662,10 +214671,6 @@ int vectorIndexInsert( int rc; VectorInRow vectorInRow; - if( IsVacuum(pCur->db) ){ - return SQLITE_OK; - } - rc = vectorInRowAlloc(pCur->db, pRecord, &vectorInRow, pzErrMsg); if( rc != SQLITE_OK ){ return rc; @@ -214685,10 +214690,6 @@ int vectorIndexDelete( ){ VectorInRow payload; - if( IsVacuum(pCur->db) ){ - return SQLITE_OK; - } - payload.pVector = NULL; payload.nKeys = r->nField - 1; payload.pKeyValues = r->aMem + 1; diff --git a/libsql-ffi/bundled/bindings/bindgen.rs b/libsql-ffi/bundled/bindings/bindgen.rs index e11d453281..cc73807f33 100644 --- a/libsql-ffi/bundled/bindings/bindgen.rs +++ b/libsql-ffi/bundled/bindings/bindgen.rs @@ -24,10 +24,10 @@ extern "C" { } pub const __GNUC_VA_LIST: i32 = 1; -pub const SQLITE_VERSION: &[u8; 7] = b"3.44.0\0"; -pub const SQLITE_VERSION_NUMBER: i32 = 3044000; +pub const SQLITE_VERSION: &[u8; 7] = b"3.45.1\0"; +pub const SQLITE_VERSION_NUMBER: i32 = 3045001; pub const SQLITE_SOURCE_ID: &[u8; 85] = - b"2023-11-01 11:23:50 17129ba1ff7f0daf37100ee82d507aef7827cf38de1866e2633096ae6ad8alt1\0"; + b"2024-01-30 16:01:20 e876e51a0ed5c5b3126f52e532044363a014bc594cfefa87ffb5b82257ccalt1\0"; pub const LIBSQL_VERSION: &[u8; 6] = b"0.2.3\0"; pub const SQLITE_OK: i32 = 0; pub const SQLITE_ERROR: i32 = 1; @@ -356,6 +356,7 @@ pub const SQLITE_DETERMINISTIC: i32 = 2048; pub const SQLITE_DIRECTONLY: i32 = 524288; pub const SQLITE_SUBTYPE: i32 = 1048576; pub const SQLITE_INNOCUOUS: i32 = 2097152; +pub const SQLITE_RESULT_SUBTYPE: i32 = 16777216; pub const SQLITE_WIN32_DATA_DIRECTORY_TYPE: i32 = 1; pub const SQLITE_WIN32_TEMP_DIRECTORY_TYPE: i32 = 2; pub const SQLITE_TXN_NONE: i32 = 0; @@ -408,6 +409,7 @@ pub const SQLITE_TESTCTRL_PENDING_BYTE: i32 = 11; pub const SQLITE_TESTCTRL_ASSERT: i32 = 12; pub const SQLITE_TESTCTRL_ALWAYS: i32 = 13; pub const SQLITE_TESTCTRL_RESERVE: i32 = 14; +pub const SQLITE_TESTCTRL_JSON_SELFCHECK: i32 = 14; pub const SQLITE_TESTCTRL_OPTIMIZATIONS: i32 = 15; pub const SQLITE_TESTCTRL_ISKEYWORD: i32 = 16; pub const SQLITE_TESTCTRL_SCRATCHMALLOC: i32 = 17; @@ -3133,6 +3135,24 @@ pub struct Fts5ExtensionApi { piCol: *mut ::std::os::raw::c_int, ), >, + pub xQueryToken: ::std::option::Option< + unsafe extern "C" fn( + arg1: *mut Fts5Context, + iPhrase: ::std::os::raw::c_int, + iToken: ::std::os::raw::c_int, + ppToken: *mut *const ::std::os::raw::c_char, + pnToken: *mut ::std::os::raw::c_int, + ) -> ::std::os::raw::c_int, + >, + pub xInstToken: ::std::option::Option< + unsafe extern "C" fn( + arg1: *mut Fts5Context, + iIdx: ::std::os::raw::c_int, + iToken: ::std::os::raw::c_int, + arg2: *mut *const ::std::os::raw::c_char, + arg3: *mut ::std::os::raw::c_int, + ) -> ::std::os::raw::c_int, + >, } #[repr(C)] #[derive(Debug, Copy, Clone)] diff --git a/libsql-ffi/bundled/src/sqlite3.c b/libsql-ffi/bundled/src/sqlite3.c index c22f35046f..ec692baa53 100644 --- a/libsql-ffi/bundled/src/sqlite3.c +++ b/libsql-ffi/bundled/src/sqlite3.c @@ -69,6 +69,7 @@ ** src/test2.c ** src/test3.c ** src/test8.c +** src/vacuum.c ** src/vdbe.c ** src/vdbeInt.h ** src/vdbeapi.c @@ -155950,6 +155951,10 @@ SQLITE_PRIVATE void sqlite3UpsertDoUpdate( /* #include "sqliteInt.h" */ /* #include "vdbeInt.h" */ +#ifndef SQLITE_OMIT_VECTOR +/* #include "vectorIndexInt.h" */ +#endif + #if !defined(SQLITE_OMIT_VACUUM) && !defined(SQLITE_OMIT_ATTACH) /* @@ -156227,6 +156232,27 @@ SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3RunVacuum( if( rc!=SQLITE_OK ) goto end_of_vacuum; db->init.iDb = 0; +#ifndef SQLITE_OMIT_VECTOR + // shadow tables for vector index will be populated automatically during CREATE INDEX command + // so we must skip them at this step + if( sqlite3FindTable(db, VECTOR_INDEX_GLOBAL_META_TABLE, zDbMain) != NULL ){ + rc = execSqlF(db, pzErrMsg, + "SELECT'INSERT INTO vacuum_db.'||quote(name)" + "||' SELECT*FROM\"%w\".'||quote(name)" + "FROM vacuum_db.sqlite_schema " + "WHERE type='table'AND coalesce(rootpage,1)>0 AND name NOT IN (SELECT name||'_shadow' FROM " VECTOR_INDEX_GLOBAL_META_TABLE ")", + zDbMain + ); + }else{ + rc = execSqlF(db, pzErrMsg, + "SELECT'INSERT INTO vacuum_db.'||quote(name)" + "||' SELECT*FROM\"%w\".'||quote(name)" + "FROM vacuum_db.sqlite_schema " + "WHERE type='table'AND coalesce(rootpage,1)>0 AND name", + zDbMain + ); + } +#else /* Loop through the tables in the main database. For each, do ** an "INSERT INTO vacuum_db.xxx SELECT * FROM main.xxx;" to copy ** the contents to the temporary database. @@ -156238,6 +156264,7 @@ SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3RunVacuum( "WHERE type='table'AND coalesce(rootpage,1)>0", zDbMain ); +#endif assert( (db->mDbFlags & DBFLAG_Vacuum)!=0 ); db->mDbFlags &= ~DBFLAG_Vacuum; if( rc!=SQLITE_OK ) goto end_of_vacuum; @@ -213656,11 +213683,6 @@ int vectorF64ParseSqliteBlob( ** VectorIdxParams utilities ****************************************************************************/ -// VACUUM creates tables and indices first and only then populate data -// we need to ignore inserts from 'INSERT INTO vacuum.t SELECT * FROM t' statements because -// all shadow tables will be populated by VACUUM process during regular process of table copy -#define IsVacuum(db) ((db->mDbFlags&DBFLAG_Vacuum)!=0) - void vectorIdxParamsInit(VectorIdxParams *pParams, u8 *pBinBuf, int nBinSize) { assert( nBinSize <= VECTOR_INDEX_PARAMS_BUF_SIZE ); @@ -214379,10 +214401,6 @@ int vectorIndexDrop(sqlite3 *db, const char *zDbSName, const char *zIdxName) { // this is done to prevent unrecoverable situations where index were dropped but index parameters deletion failed and second attempt will fail on first step int rcIdx, rcParams; - if( IsVacuum(db) ){ - return SQLITE_OK; - } - assert( zDbSName != NULL ); rcIdx = diskAnnDropIndex(db, zDbSName, zIdxName); @@ -214393,10 +214411,6 @@ int vectorIndexDrop(sqlite3 *db, const char *zDbSName, const char *zIdxName) { int vectorIndexClear(sqlite3 *db, const char *zDbSName, const char *zIdxName) { assert( zDbSName != NULL ); - if( IsVacuum(db) ){ - return SQLITE_OK; - } - return diskAnnClearIndex(db, zDbSName, zIdxName); } @@ -214406,7 +214420,7 @@ int vectorIndexClear(sqlite3 *db, const char *zDbSName, const char *zIdxName) { * this made intentionally in order to natively support upload of SQLite dumps * * dump populates tables first and create indices after - * so we must omit them because shadow tables already filled + * so we must omit index refill setp because shadow tables already filled * * 1. in case of any error :-1 returned (and pParse errMsg is populated with some error message) * 2. if vector index must not be created : 0 returned @@ -214424,10 +214438,6 @@ int vectorIndexCreate(Parse *pParse, const Index *pIdx, const char *zDbSName, co int hasLibsqlVectorIdxFn = 0, hasCollation = 0; const char *pzErrMsg; - if( IsVacuum(pParse->db) ){ - return CREATE_IGNORE; - } - assert( zDbSName != NULL ); sqlite3 *db = pParse->db; @@ -214577,7 +214587,6 @@ int vectorIndexSearch( VectorIdxParams idxParams; vectorIdxParamsInit(&idxParams, NULL, 0); - assert( !IsVacuum(db) ); assert( zDbSName != NULL ); if( argc != 3 ){ @@ -214662,10 +214671,6 @@ int vectorIndexInsert( int rc; VectorInRow vectorInRow; - if( IsVacuum(pCur->db) ){ - return SQLITE_OK; - } - rc = vectorInRowAlloc(pCur->db, pRecord, &vectorInRow, pzErrMsg); if( rc != SQLITE_OK ){ return rc; @@ -214685,10 +214690,6 @@ int vectorIndexDelete( ){ VectorInRow payload; - if( IsVacuum(pCur->db) ){ - return SQLITE_OK; - } - payload.pVector = NULL; payload.nKeys = r->nField - 1; payload.pKeyValues = r->aMem + 1; From 2115277f8c6c8ab8494dc70fa1413d30b7f362f4 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Tue, 6 Aug 2024 14:51:32 +0400 Subject: [PATCH 12/50] fix bug --- libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c | 2 +- libsql-ffi/bundled/src/sqlite3.c | 2 +- libsql-sqlite3/src/vacuum.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c b/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c index ec692baa53..c25985af8a 100644 --- a/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c +++ b/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c @@ -156248,7 +156248,7 @@ SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3RunVacuum( "SELECT'INSERT INTO vacuum_db.'||quote(name)" "||' SELECT*FROM\"%w\".'||quote(name)" "FROM vacuum_db.sqlite_schema " - "WHERE type='table'AND coalesce(rootpage,1)>0 AND name", + "WHERE type='table'AND coalesce(rootpage,1)>0", zDbMain ); } diff --git a/libsql-ffi/bundled/src/sqlite3.c b/libsql-ffi/bundled/src/sqlite3.c index ec692baa53..c25985af8a 100644 --- a/libsql-ffi/bundled/src/sqlite3.c +++ b/libsql-ffi/bundled/src/sqlite3.c @@ -156248,7 +156248,7 @@ SQLITE_PRIVATE SQLITE_NOINLINE int sqlite3RunVacuum( "SELECT'INSERT INTO vacuum_db.'||quote(name)" "||' SELECT*FROM\"%w\".'||quote(name)" "FROM vacuum_db.sqlite_schema " - "WHERE type='table'AND coalesce(rootpage,1)>0 AND name", + "WHERE type='table'AND coalesce(rootpage,1)>0", zDbMain ); } diff --git a/libsql-sqlite3/src/vacuum.c b/libsql-sqlite3/src/vacuum.c index d927a8d5a6..f8e848aca6 100644 --- a/libsql-sqlite3/src/vacuum.c +++ b/libsql-sqlite3/src/vacuum.c @@ -314,7 +314,7 @@ SQLITE_NOINLINE int sqlite3RunVacuum( "SELECT'INSERT INTO vacuum_db.'||quote(name)" "||' SELECT*FROM\"%w\".'||quote(name)" "FROM vacuum_db.sqlite_schema " - "WHERE type='table'AND coalesce(rootpage,1)>0 AND name", + "WHERE type='table'AND coalesce(rootpage,1)>0", zDbMain ); } From b12431c33a172ba0c96f35cfd35e36ce931b5c00 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 6 Aug 2024 13:30:21 +0200 Subject: [PATCH 13/50] configure durable wal --- libsql-server/src/lib.rs | 98 ++++++++++++++++++++++++++++++++-------- 1 file changed, 78 insertions(+), 20 deletions(-) diff --git a/libsql-server/src/lib.rs b/libsql-server/src/lib.rs index 4188365e03..9ee8e3b908 100644 --- a/libsql-server/src/lib.rs +++ b/libsql-server/src/lib.rs @@ -55,6 +55,7 @@ use url::Url; use utils::services::idle_shutdown::IdleShutdownKicker; use self::config::MetaStoreConfig; +use self::connection::connection_manager::InnerWalManager; use self::namespace::configurator::{ BaseNamespaceConfig, NamespaceConfigurators, PrimaryConfigurator, PrimaryExtraConfig, ReplicaConfigurator, SchemaConfigurator, @@ -336,7 +337,8 @@ where config.heartbeat_url.as_deref().unwrap_or(""), config.heartbeat_period, ); - join_set.spawn({ + + self.spawn_until_shutdown_on(join_set, { let heartbeat_auth = config.heartbeat_auth.clone(); let heartbeat_period = config.heartbeat_period; let heartbeat_url = if let Some(url) = &config.heartbeat_url { @@ -428,7 +430,7 @@ where let (scripted_backup, script_backup_task) = ScriptBackupManager::new(&self.path, CommandHandler::new(command.to_string())) .await?; - self.spawn_until_shutdown(&mut join_set, script_backup_task.run()); + self.spawn_until_shutdown_on(&mut join_set, script_backup_task.run()); Some(scripted_backup) } None => None, @@ -484,7 +486,6 @@ where ) .await?; - self.spawn_monitoring_tasks(&mut join_set, stats_receiver)?; // if namespaces are enabled, then bottomless must have set DB ID @@ -501,7 +502,7 @@ where let proxy_service = ProxyService::new(namespace_store.clone(), None, self.disable_namespaces); // Garbage collect proxy clients every 30 seconds - self.spawn_until_shutdown(&mut join_set, { + self.spawn_until_shutdown_on(&mut join_set, { let clients = proxy_service.clients(); async move { loop { @@ -511,14 +512,17 @@ where } }); - self.spawn_until_shutdown(&mut join_set, run_rpc_server( - proxy_service, - config.acceptor, - config.tls_config, - idle_shutdown_kicker.clone(), - namespace_store.clone(), - self.disable_namespaces, - )); + self.spawn_until_shutdown_on( + &mut join_set, + run_rpc_server( + proxy_service, + config.acceptor, + config.tls_config, + idle_shutdown_kicker.clone(), + namespace_store.clone(), + self.disable_namespaces, + ), + ); } let shutdown_timeout = self.shutdown_timeout.clone(); @@ -530,7 +534,7 @@ where // The migration scheduler is only useful on the primary let meta_conn = metastore_conn_maker()?; let scheduler = Scheduler::new(namespace_store.clone(), meta_conn).await?; - self.spawn_until_shutdown(&mut join_set, async move { + self.spawn_until_shutdown_on(&mut join_set, async move { scheduler.run(scheduler_receiver).await; Ok(()) }); @@ -560,7 +564,7 @@ where ); // Garbage collect proxy clients every 30 seconds - self.spawn_until_shutdown(&mut join_set, { + self.spawn_until_shutdown_on(&mut join_set, { let clients = proxy_svc.clients(); async move { loop { @@ -583,8 +587,7 @@ where DatabaseKind::Replica => { dbg!(); let (channel, uri) = client_config.clone().unwrap(); - let replication_svc = - ReplicationLogProxyService::new(channel.clone(), uri.clone()); + let replication_svc = ReplicationLogProxyService::new(channel.clone(), uri.clone()); let proxy_svc = ReplicaProxyService::new( channel, uri, @@ -651,7 +654,12 @@ where match self.use_custom_wal { Some(CustomWAL::LibsqlWal) => self.libsql_wal_configurators(), #[cfg(feature = "durable-wal")] - Some(CustomWAL::DurableWal) => self.durable_wal_configurators(), + Some(CustomWAL::DurableWal) => self.durable_wal_configurators( + base_config, + scripted_backup, + migration_scheduler_handle, + client_config, + ), None => { self.legacy_configurators( base_config, @@ -669,11 +677,44 @@ where } #[cfg(feature = "durable-wal")] - fn durable_wal_configurators(&self) -> anyhow::Result { - todo!(); + fn durable_wal_configurators( + &self, + base_config: BaseNamespaceConfig, + scripted_backup: Option, + migration_scheduler_handle: SchedulerHandle, + client_config: Option<(Channel, Uri)>, + ) -> anyhow::Result { + tracing::info!("using durable wal"); + let lock_manager = Arc::new(std::sync::Mutex::new(LockManager::new())); + let namespace_resolver = |path: &Path| { + NamespaceName::from_string( + path.parent() + .unwrap() + .file_name() + .unwrap() + .to_str() + .unwrap() + .to_string(), + ) + .unwrap() + .into() + }; + let wal = DurableWalManager::new( + lock_manager, + namespace_resolver, + self.storage_server_address.clone(), + ); + let make_wal_manager = Arc::new(move || EitherWAL::C(wal.clone())); + self.configurators_common( + client_config, + base_config, + make_wal_manager, + scripted_backup, + migration_scheduler_handle, + ) } - fn spawn_until_shutdown(&self, join_set: &mut JoinSet>, fut: F) + fn spawn_until_shutdown_on(&self, join_set: &mut JoinSet>, fut: F) where F: Future> + Send + 'static, { @@ -694,6 +735,23 @@ where client_config: Option<(Channel, Uri)>, ) -> anyhow::Result { let make_wal_manager = Arc::new(|| EitherWAL::A(Sqlite3WalManager::default())); + self.configurators_common( + client_config, + base_config, + make_wal_manager, + scripted_backup, + migration_scheduler_handle, + ) + } + + fn configurators_common( + &self, + client_config: Option<(Channel, Uri)>, + base_config: BaseNamespaceConfig, + make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, + scripted_backup: Option, + migration_scheduler_handle: SchedulerHandle, + ) -> anyhow::Result { let mut configurators = NamespaceConfigurators::empty(); match client_config { From 066f1527572d3e3012457e588e9d28e8e74656ec Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 6 Aug 2024 14:37:10 +0200 Subject: [PATCH 14/50] configure libsql_wal --- libsql-server/src/lib.rs | 213 ++++++++++++++++++++++++++++++--------- 1 file changed, 165 insertions(+), 48 deletions(-) diff --git a/libsql-server/src/lib.rs b/libsql-server/src/lib.rs index 9ee8e3b908..f5788dcebb 100644 --- a/libsql-server/src/lib.rs +++ b/libsql-server/src/lib.rs @@ -28,6 +28,7 @@ use auth::Auth; use config::{ AdminApiConfig, DbConfig, HeartbeatConfig, RpcClientConfig, RpcServerConfig, UserApiConfig, }; +use futures::future::{pending, ready}; use futures::Future; use http::user::UserApi; use hyper::client::HttpConnector; @@ -40,6 +41,10 @@ use libsql_sys::wal::either::Either as EitherWAL; #[cfg(feature = "durable-wal")] use libsql_sys::wal::either::Either3 as EitherWAL; use libsql_sys::wal::Sqlite3WalManager; +use libsql_wal::checkpointer::LibsqlCheckpointer; +use libsql_wal::registry::WalRegistry; +use libsql_wal::storage::NoStorage; +use libsql_wal::wal::LibsqlWalManager; use namespace::meta_store::MetaStoreHandle; use namespace::NamespaceName; use net::Connector; @@ -458,9 +463,10 @@ where let configurators = self .make_configurators( base_config, - scripted_backup, - scheduler_sender.into(), client_config.clone(), + &mut join_set, + scheduler_sender.into(), + scripted_backup, ) .await?; @@ -596,7 +602,6 @@ where self.disable_namespaces, ); - dbg!(); self.make_services( namespace_store.clone(), idle_shutdown_kicker, @@ -647,42 +652,125 @@ where async fn make_configurators( &self, base_config: BaseNamespaceConfig, - scripted_backup: Option, - migration_scheduler_handle: SchedulerHandle, client_config: Option<(Channel, Uri)>, + join_set: &mut JoinSet>, + migration_scheduler_handle: SchedulerHandle, + scripted_backup: Option, ) -> anyhow::Result { + let wal_path = base_config.base_path.join("wals"); + let enable_libsql_wal_test = { + let is_primary = self.rpc_server_config.is_some(); + let is_libsql_wal_test = std::env::var("LIBSQL_WAL_TEST").is_ok(); + is_primary && is_libsql_wal_test + }; + let use_libsql_wal = + self.use_custom_wal == Some(CustomWAL::LibsqlWal) || enable_libsql_wal_test; + if !use_libsql_wal { + if wal_path.try_exists()? { + anyhow::bail!("database was previously setup to use libsql-wal"); + } + } + + if self.use_custom_wal.is_some() { + if self.db_config.bottomless_replication.is_some() { + anyhow::bail!("bottomless not supported with custom WAL"); + } + if self.rpc_client_config.is_some() { + anyhow::bail!("custom WAL not supported in replica mode"); + } + } + match self.use_custom_wal { - Some(CustomWAL::LibsqlWal) => self.libsql_wal_configurators(), + Some(CustomWAL::LibsqlWal) => self.libsql_wal_configurators( + base_config, + client_config, + join_set, + migration_scheduler_handle, + scripted_backup, + wal_path, + ), #[cfg(feature = "durable-wal")] Some(CustomWAL::DurableWal) => self.durable_wal_configurators( base_config, - scripted_backup, - migration_scheduler_handle, client_config, + migration_scheduler_handle, + scripted_backup, ), None => { self.legacy_configurators( base_config, - scripted_backup, - migration_scheduler_handle, client_config, + migration_scheduler_handle, + scripted_backup, ) .await } } } - fn libsql_wal_configurators(&self) -> anyhow::Result { - todo!() + fn libsql_wal_configurators( + &self, + base_config: BaseNamespaceConfig, + client_config: Option<(Channel, Uri)>, + join_set: &mut JoinSet>, + migration_scheduler_handle: SchedulerHandle, + scripted_backup: Option, + wal_path: PathBuf, + ) -> anyhow::Result { + tracing::info!("using libsql wal"); + let (sender, receiver) = tokio::sync::mpsc::channel(64); + let registry = Arc::new(WalRegistry::new(wal_path, NoStorage, sender)?); + let checkpointer = LibsqlCheckpointer::new(registry.clone(), receiver, 8); + self.spawn_until_shutdown_on(join_set, async move { + checkpointer.run().await; + Ok(()) + }); + + let namespace_resolver = |path: &Path| { + NamespaceName::from_string( + path.parent() + .unwrap() + .file_name() + .unwrap() + .to_str() + .unwrap() + .to_string(), + ) + .unwrap() + .into() + }; + let wal = LibsqlWalManager::new(registry.clone(), Arc::new(namespace_resolver)); + + self.spawn_until_shutdown_with_teardown(join_set, pending(), async move { + registry.shutdown().await?; + Ok(()) + }); + + let make_wal_manager = Arc::new(move || EitherWAL::B(wal.clone())); + let mut configurators = NamespaceConfigurators::empty(); + + match client_config { + Some(_) => todo!("configure replica"), + // configure primary + None => self.configure_primary_common( + base_config, + &mut configurators, + make_wal_manager, + migration_scheduler_handle, + scripted_backup, + ), + } + + Ok(configurators) } #[cfg(feature = "durable-wal")] fn durable_wal_configurators( &self, base_config: BaseNamespaceConfig, - scripted_backup: Option, - migration_scheduler_handle: SchedulerHandle, client_config: Option<(Channel, Uri)>, + migration_scheduler_handle: SchedulerHandle, + scripted_backup: Option, ) -> anyhow::Result { tracing::info!("using durable wal"); let lock_manager = Arc::new(std::sync::Mutex::new(LockManager::new())); @@ -706,22 +794,37 @@ where ); let make_wal_manager = Arc::new(move || EitherWAL::C(wal.clone())); self.configurators_common( - client_config, base_config, + client_config, make_wal_manager, - scripted_backup, migration_scheduler_handle, + scripted_backup, ) } fn spawn_until_shutdown_on(&self, join_set: &mut JoinSet>, fut: F) where F: Future> + Send + 'static, + { + self.spawn_until_shutdown_with_teardown(join_set, fut, ready(Ok(()))) + } + + /// run the passed future until shutdown is called, then call the passed teardown future + fn spawn_until_shutdown_with_teardown( + &self, + join_set: &mut JoinSet>, + fut: F, + teardown: T, + ) where + F: Future> + Send + 'static, + T: Future> + Send + 'static, { let shutdown = self.shutdown.clone(); join_set.spawn(async move { tokio::select! { - _ = shutdown.notified() => Ok(()), + _ = shutdown.notified() => { + teardown.await + }, ret = fut => ret } }); @@ -730,30 +833,29 @@ where async fn legacy_configurators( &self, base_config: BaseNamespaceConfig, - scripted_backup: Option, - migration_scheduler_handle: SchedulerHandle, client_config: Option<(Channel, Uri)>, + migration_scheduler_handle: SchedulerHandle, + scripted_backup: Option, ) -> anyhow::Result { let make_wal_manager = Arc::new(|| EitherWAL::A(Sqlite3WalManager::default())); self.configurators_common( - client_config, base_config, + client_config, make_wal_manager, - scripted_backup, migration_scheduler_handle, + scripted_backup, ) } fn configurators_common( &self, - client_config: Option<(Channel, Uri)>, base_config: BaseNamespaceConfig, + client_config: Option<(Channel, Uri)>, make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, - scripted_backup: Option, migration_scheduler_handle: SchedulerHandle, + scripted_backup: Option, ) -> anyhow::Result { let mut configurators = NamespaceConfigurators::empty(); - match client_config { // replica mode Some((channel, uri)) => { @@ -762,34 +864,49 @@ where configurators.with_replica(replica_configurator); } // primary mode - None => { - let primary_config = PrimaryExtraConfig { - max_log_size: self.db_config.max_log_size, - max_log_duration: self.db_config.max_log_duration.map(Duration::from_secs_f32), - bottomless_replication: self.db_config.bottomless_replication.clone(), - scripted_backup, - checkpoint_interval: self.db_config.checkpoint_interval, - }; + None => self.configure_primary_common( + base_config, + &mut configurators, + make_wal_manager, + migration_scheduler_handle, + scripted_backup, + ), + } - let primary_configurator = PrimaryConfigurator::new( - base_config.clone(), - primary_config.clone(), - make_wal_manager.clone(), - ); + Ok(configurators) + } - let schema_configurator = SchemaConfigurator::new( - base_config.clone(), - primary_config, - make_wal_manager.clone(), - migration_scheduler_handle, - ); + fn configure_primary_common( + &self, + base_config: BaseNamespaceConfig, + configurators: &mut NamespaceConfigurators, + make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, + migration_scheduler_handle: SchedulerHandle, + scripted_backup: Option, + ) { + let primary_config = PrimaryExtraConfig { + max_log_size: self.db_config.max_log_size, + max_log_duration: self.db_config.max_log_duration.map(Duration::from_secs_f32), + bottomless_replication: self.db_config.bottomless_replication.clone(), + scripted_backup, + checkpoint_interval: self.db_config.checkpoint_interval, + }; - configurators.with_schema(schema_configurator); - configurators.with_primary(primary_configurator); - } - } + let primary_configurator = PrimaryConfigurator::new( + base_config.clone(), + primary_config.clone(), + make_wal_manager.clone(), + ); - Ok(configurators) + let schema_configurator = SchemaConfigurator::new( + base_config.clone(), + primary_config, + make_wal_manager.clone(), + migration_scheduler_handle, + ); + + configurators.with_schema(schema_configurator); + configurators.with_primary(primary_configurator); } fn setup_shutdown(&self) -> Option { From b5dba7241531d3c5bb58a093189b72b9ea47fb25 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 6 Aug 2024 16:52:23 +0200 Subject: [PATCH 15/50] partial implmentation of LibsqlWalReplicationConfigurator --- .../configurator/libsql_wal_replica.rs | 138 ++++++++++++++++++ 1 file changed, 138 insertions(+) create mode 100644 libsql-server/src/namespace/configurator/libsql_wal_replica.rs diff --git a/libsql-server/src/namespace/configurator/libsql_wal_replica.rs b/libsql-server/src/namespace/configurator/libsql_wal_replica.rs new file mode 100644 index 0000000000..6b2519cf33 --- /dev/null +++ b/libsql-server/src/namespace/configurator/libsql_wal_replica.rs @@ -0,0 +1,138 @@ +use std::pin::Pin; +use std::future::Future; +use std::sync::Arc; + +use chrono::prelude::NaiveDateTime; +use hyper::Uri; +use libsql_replication::rpc::replication::replication_log_client::ReplicationLogClient; +use libsql_wal::io::StdIO; +use libsql_wal::registry::WalRegistry; +use libsql_wal::storage::NoStorage; +use tokio::task::JoinSet; +use tonic::transport::Channel; + +use crate::connection::config::DatabaseConfig; +use crate::connection::connection_manager::InnerWalManager; +use crate::connection::write_proxy::MakeWriteProxyConn; +use crate::connection::MakeConnection; +use crate::database::{Database, ReplicaDatabase}; +use crate::namespace::broadcasters::BroadcasterHandle; +use crate::namespace::configurator::helpers::make_stats; +use crate::namespace::meta_store::MetaStoreHandle; +use crate::namespace::{ + Namespace, NamespaceBottomlessDbIdInit, NamespaceName, NamespaceStore, ResetCb, + ResolveNamespacePathFn, RestoreOption, +}; +use crate::DEFAULT_AUTO_CHECKPOINT; + +use super::{BaseNamespaceConfig, ConfigureNamespace}; + +pub struct LibsqlWalReplicaConfigurator { + base: BaseNamespaceConfig, + registry: Arc>, + uri: Uri, + channel: Channel, + make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, +} + +impl ConfigureNamespace for LibsqlWalReplicaConfigurator { + fn setup<'a>( + &'a self, + db_config: MetaStoreHandle, + restore_option: RestoreOption, + name: &'a NamespaceName, + reset: ResetCb, + resolve_attach_path: ResolveNamespacePathFn, + store: NamespaceStore, + broadcaster: BroadcasterHandle, + ) -> Pin> + Send + 'a>> { + Box::pin(async move { + tracing::debug!("creating replica namespace"); + let db_path = self.base.base_path.join("dbs").join(name.as_str()); + let channel = self.channel.clone(); + let uri = self.uri.clone(); + + let rpc_client = ReplicationLogClient::with_origin(channel.clone(), uri.clone()); + // TODO! setup replication + + let mut join_set = JoinSet::new(); + let namespace = name.clone(); + + let stats = make_stats( + &db_path, + &mut join_set, + db_config.clone(), + self.base.stats_sender.clone(), + name.clone(), + applied_frame_no_receiver.clone(), + ) + .await?; + + let connection_maker = MakeWriteProxyConn::new( + db_path.clone(), + self.base.extensions.clone(), + channel.clone(), + uri.clone(), + stats.clone(), + broadcaster, + db_config.clone(), + applied_frame_no_receiver, + self.base.max_response_size, + self.base.max_total_response_size, + primary_current_replication_index, + None, + resolve_attach_path, + self.make_wal_manager.clone(), + ) + .await? + .throttled( + self.base.max_concurrent_connections.clone(), + Some(DB_CREATE_TIMEOUT), + self.base.max_total_response_size, + self.base.max_concurrent_requests, + ); + + Ok(Namespace { + tasks: join_set, + db: Database::Replica(ReplicaDatabase { + connection_maker: Arc::new(connection_maker), + }), + name: name.clone(), + stats, + db_config_store: db_config, + path: db_path.into(), + }) + }) + } + + fn cleanup<'a>( + &'a self, + namespace: &'a NamespaceName, + _db_config: &DatabaseConfig, + _prune_all: bool, + _bottomless_db_id_init: NamespaceBottomlessDbIdInit, + ) -> Pin> + Send + 'a>> { + Box::pin(async move { + let ns_path = self.base.base_path.join("dbs").join(namespace.as_str()); + if ns_path.try_exists()? { + tracing::debug!("removing database directory: {}", ns_path.display()); + tokio::fs::remove_dir_all(ns_path).await?; + } + Ok(()) + }) + } + + fn fork<'a>( + &'a self, + _from_ns: &'a Namespace, + _from_config: MetaStoreHandle, + _to_ns: NamespaceName, + _to_config: MetaStoreHandle, + _timestamp: Option, + _store: NamespaceStore, + ) -> Pin> + Send + 'a>> { + Box::pin(std::future::ready(Err(crate::Error::Fork( + super::fork::ForkError::ForkReplica, + )))) + } +} From ded5ba7f859f6f6b94f1a1b6614e31521a591f01 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 6 Aug 2024 16:54:14 +0200 Subject: [PATCH 16/50] fmt + remove dbgs --- libsql-server/src/http/admin/stats.rs | 2 - libsql-server/src/lib.rs | 2 - .../src/namespace/configurator/fork.rs | 7 ++- .../src/namespace/configurator/helpers.rs | 60 +++++++++--------- .../configurator/libsql_wal_replica.rs | 18 +++--- .../src/namespace/configurator/mod.rs | 18 ++++-- .../src/namespace/configurator/primary.rs | 13 ++-- .../src/namespace/configurator/schema.rs | 29 ++++++--- libsql-server/src/namespace/mod.rs | 2 +- libsql-server/src/namespace/store.rs | 13 +--- libsql-server/src/schema/scheduler.rs | 63 ++++++------------- libsql-server/tests/cluster/mod.rs | 29 +++------ 12 files changed, 111 insertions(+), 145 deletions(-) diff --git a/libsql-server/src/http/admin/stats.rs b/libsql-server/src/http/admin/stats.rs index 5fce92ba0a..f2948d4d7b 100644 --- a/libsql-server/src/http/admin/stats.rs +++ b/libsql-server/src/http/admin/stats.rs @@ -140,12 +140,10 @@ pub(super) async fn handle_stats( State(app_state): State>>, Path(namespace): Path, ) -> crate::Result> { - dbg!(); let stats = app_state .namespaces .stats(NamespaceName::from_string(namespace)?) .await?; - dbg!(); let resp: StatsResponse = stats.as_ref().into(); Ok(Json(resp)) diff --git a/libsql-server/src/lib.rs b/libsql-server/src/lib.rs index f5788dcebb..d26921dd00 100644 --- a/libsql-server/src/lib.rs +++ b/libsql-server/src/lib.rs @@ -591,7 +591,6 @@ where .configure(&mut join_set); } DatabaseKind::Replica => { - dbg!(); let (channel, uri) = client_config.clone().unwrap(); let replication_svc = ReplicationLogProxyService::new(channel.clone(), uri.clone()); let proxy_svc = ReplicaProxyService::new( @@ -611,7 +610,6 @@ where service_shutdown.clone(), ) .configure(&mut join_set); - dbg!(); } }; diff --git a/libsql-server/src/namespace/configurator/fork.rs b/libsql-server/src/namespace/configurator/fork.rs index 26a0b99b61..03f2ac03d8 100644 --- a/libsql-server/src/namespace/configurator/fork.rs +++ b/libsql-server/src/namespace/configurator/fork.rs @@ -58,7 +58,7 @@ pub(super) async fn fork( Database::Schema(db) => db.wal_wrapper.wrapper().logger(), _ => { return Err(crate::Error::Fork(ForkError::Internal(anyhow::Error::msg( - "Invalid source database type for fork", + "Invalid source database type for fork", )))); } }; @@ -114,7 +114,7 @@ pub struct ForkTask { pub to_namespace: NamespaceName, pub to_config: MetaStoreHandle, pub restore_to: Option, - pub store: NamespaceStore + pub store: NamespaceStore, } pub struct PointInTimeRestore { @@ -156,7 +156,8 @@ impl ForkTask { let dest_path = self.base_path.join("dbs").join(self.to_namespace.as_str()); tokio::fs::rename(temp_dir.path(), dest_path).await?; - self.store.make_namespace(&self.to_namespace, self.to_config, RestoreOption::Latest) + self.store + .make_namespace(&self.to_namespace, self.to_config, RestoreOption::Latest) .await .map_err(|e| ForkError::CreateNamespace(Box::new(e))) } diff --git a/libsql-server/src/namespace/configurator/helpers.rs b/libsql-server/src/namespace/configurator/helpers.rs index f43fa8a192..a5a4c5121d 100644 --- a/libsql-server/src/namespace/configurator/helpers.rs +++ b/libsql-server/src/namespace/configurator/helpers.rs @@ -6,26 +6,29 @@ use std::time::Duration; use anyhow::Context as _; use bottomless::replicator::Options; use bytes::Bytes; +use enclose::enclose; use futures::Stream; use libsql_sys::wal::Sqlite3WalManager; use tokio::io::AsyncBufReadExt as _; use tokio::sync::watch; use tokio::task::JoinSet; use tokio_util::io::StreamReader; -use enclose::enclose; use crate::connection::config::DatabaseConfig; use crate::connection::connection_manager::InnerWalManager; use crate::connection::libsql::{open_conn, MakeLibSqlConn}; use crate::connection::{Connection as _, MakeConnection as _}; +use crate::database::{PrimaryConnection, PrimaryConnectionMaker}; use crate::error::LoadDumpError; +use crate::namespace::broadcasters::BroadcasterHandle; +use crate::namespace::meta_store::MetaStoreHandle; +use crate::namespace::replication_wal::{make_replication_wal_wrapper, ReplicationWalWrapper}; +use crate::namespace::{ + NamespaceBottomlessDbId, NamespaceBottomlessDbIdInit, NamespaceName, ResolveNamespacePathFn, + RestoreOption, +}; use crate::replication::{FrameNo, ReplicationLogger}; use crate::stats::Stats; -use crate::namespace::{NamespaceBottomlessDbId, NamespaceBottomlessDbIdInit, NamespaceName, ResolveNamespacePathFn, RestoreOption}; -use crate::namespace::replication_wal::{make_replication_wal_wrapper, ReplicationWalWrapper}; -use crate::namespace::meta_store::MetaStoreHandle; -use crate::namespace::broadcasters::BroadcasterHandle; -use crate::database::{PrimaryConnection, PrimaryConnectionMaker}; use crate::{StatsSender, BLOCKING_RT, DB_CREATE_TIMEOUT, DEFAULT_AUTO_CHECKPOINT}; use super::{BaseNamespaceConfig, PrimaryExtraConfig}; @@ -74,8 +77,7 @@ pub(super) async fn make_primary_connection_maker( tracing::debug!("Checkpointed before initializing bottomless"); let options = make_bottomless_options(options, bottomless_db_id, name.clone()); let (replicator, did_recover) = - init_bottomless_replicator(db_path.join("data"), options, &restore_option) - .await?; + init_bottomless_replicator(db_path.join("data"), options, &restore_option).await?; tracing::debug!("Completed init of bottomless replicator"); is_dirty |= did_recover; Some(replicator) @@ -93,14 +95,14 @@ pub(super) async fn make_primary_connection_maker( }; let logger = Arc::new(ReplicationLogger::open( - &db_path, - primary_config.max_log_size, - primary_config.max_log_duration, - is_dirty, - auto_checkpoint, - primary_config.scripted_backup.clone(), - name.clone(), - None, + &db_path, + primary_config.max_log_size, + primary_config.max_log_duration, + is_dirty, + auto_checkpoint, + primary_config.scripted_backup.clone(), + name.clone(), + None, )?); tracing::debug!("sending stats"); @@ -113,7 +115,7 @@ pub(super) async fn make_primary_connection_maker( name.clone(), logger.new_frame_notifier.subscribe(), ) - .await?; + .await?; tracing::debug!("Making replication wal wrapper"); let wal_wrapper = make_replication_wal_wrapper(bottomless_replicator, logger.clone()); @@ -136,13 +138,13 @@ pub(super) async fn make_primary_connection_maker( resolve_attach_path, make_wal_manager.clone(), ) - .await? - .throttled( - base_config.max_concurrent_connections.clone(), - Some(DB_CREATE_TIMEOUT), - base_config.max_total_response_size, - base_config.max_concurrent_requests, - ); + .await? + .throttled( + base_config.max_concurrent_connections.clone(), + Some(DB_CREATE_TIMEOUT), + base_config.max_total_response_size, + base_config.max_concurrent_requests, + ); tracing::debug!("Completed opening libsql connection"); @@ -356,10 +358,7 @@ pub(super) async fn make_stats( } }); - join_set.spawn(run_storage_monitor( - db_path.into(), - Arc::downgrade(&stats), - )); + join_set.spawn(run_storage_monitor(db_path.into(), Arc::downgrade(&stats))); tracing::debug!("done sending stats, and creating bg tasks"); @@ -369,10 +368,7 @@ pub(super) async fn make_stats( // Periodically check the storage used by the database and save it in the Stats structure. // TODO: Once we have a separate fiber that does WAL checkpoints, running this routine // right after checkpointing is exactly where it should be done. -async fn run_storage_monitor( - db_path: PathBuf, - stats: Weak, -) -> anyhow::Result<()> { +async fn run_storage_monitor(db_path: PathBuf, stats: Weak) -> anyhow::Result<()> { // on initialization, the database file doesn't exist yet, so we wait a bit for it to be // created tokio::time::sleep(Duration::from_secs(1)).await; diff --git a/libsql-server/src/namespace/configurator/libsql_wal_replica.rs b/libsql-server/src/namespace/configurator/libsql_wal_replica.rs index 6b2519cf33..f26738ec2a 100644 --- a/libsql-server/src/namespace/configurator/libsql_wal_replica.rs +++ b/libsql-server/src/namespace/configurator/libsql_wal_replica.rs @@ -1,5 +1,5 @@ -use std::pin::Pin; use std::future::Future; +use std::pin::Pin; use std::sync::Arc; use chrono::prelude::NaiveDateTime; @@ -66,7 +66,7 @@ impl ConfigureNamespace for LibsqlWalReplicaConfigurator { name.clone(), applied_frame_no_receiver.clone(), ) - .await?; + .await?; let connection_maker = MakeWriteProxyConn::new( db_path.clone(), @@ -84,13 +84,13 @@ impl ConfigureNamespace for LibsqlWalReplicaConfigurator { resolve_attach_path, self.make_wal_manager.clone(), ) - .await? - .throttled( - self.base.max_concurrent_connections.clone(), - Some(DB_CREATE_TIMEOUT), - self.base.max_total_response_size, - self.base.max_concurrent_requests, - ); + .await? + .throttled( + self.base.max_concurrent_connections.clone(), + Some(DB_CREATE_TIMEOUT), + self.base.max_total_response_size, + self.base.max_concurrent_requests, + ); Ok(Namespace { tasks: join_set, diff --git a/libsql-server/src/namespace/configurator/mod.rs b/libsql-server/src/namespace/configurator/mod.rs index e5db335ff6..9122fc18de 100644 --- a/libsql-server/src/namespace/configurator/mod.rs +++ b/libsql-server/src/namespace/configurator/mod.rs @@ -13,13 +13,17 @@ use crate::StatsSender; use super::broadcasters::BroadcasterHandle; use super::meta_store::MetaStoreHandle; -use super::{Namespace, NamespaceBottomlessDbIdInit, NamespaceName, NamespaceStore, ResetCb, ResolveNamespacePathFn, RestoreOption}; +use super::{ + Namespace, NamespaceBottomlessDbIdInit, NamespaceName, NamespaceStore, ResetCb, + ResolveNamespacePathFn, RestoreOption, +}; +pub mod fork; mod helpers; +mod libsql_wal_replica; mod primary; mod replica; mod schema; -pub mod fork; pub use primary::PrimaryConfigurator; pub use replica::ReplicaConfigurator; @@ -68,12 +72,18 @@ impl NamespaceConfigurators { } } - pub fn with_primary(&mut self, c: impl ConfigureNamespace + Send + Sync + 'static) -> &mut Self { + pub fn with_primary( + &mut self, + c: impl ConfigureNamespace + Send + Sync + 'static, + ) -> &mut Self { self.primary_configurator = Some(Box::new(c)); self } - pub fn with_replica(&mut self, c: impl ConfigureNamespace + Send + Sync + 'static) -> &mut Self { + pub fn with_replica( + &mut self, + c: impl ConfigureNamespace + Send + Sync + 'static, + ) -> &mut Self { self.replica_configurator = Some(Box::new(c)); self } diff --git a/libsql-server/src/namespace/configurator/primary.rs b/libsql-server/src/namespace/configurator/primary.rs index 4351f6a3ac..6c245a6e8f 100644 --- a/libsql-server/src/namespace/configurator/primary.rs +++ b/libsql-server/src/namespace/configurator/primary.rs @@ -12,8 +12,8 @@ use crate::namespace::broadcasters::BroadcasterHandle; use crate::namespace::configurator::helpers::make_primary_connection_maker; use crate::namespace::meta_store::MetaStoreHandle; use crate::namespace::{ - Namespace, NamespaceBottomlessDbIdInit, NamespaceName, NamespaceStore, - ResetCb, ResolveNamespacePathFn, RestoreOption, + Namespace, NamespaceBottomlessDbIdInit, NamespaceName, NamespaceStore, ResetCb, + ResolveNamespacePathFn, RestoreOption, }; use crate::run_periodic_checkpoint; use crate::schema::{has_pending_migration_task, setup_migration_table}; @@ -168,7 +168,8 @@ impl ConfigureNamespace for PrimaryConfigurator { db_config, prune_all, bottomless_db_id_init, - ).await + ) + .await }) } @@ -186,10 +187,10 @@ impl ConfigureNamespace for PrimaryConfigurator { from_config, to_ns, to_config, - timestamp, + timestamp, store, &self.primary_config, - self.base.base_path.clone())) + self.base.base_path.clone(), + )) } } - diff --git a/libsql-server/src/namespace/configurator/schema.rs b/libsql-server/src/namespace/configurator/schema.rs index e55c706fec..98e679513a 100644 --- a/libsql-server/src/namespace/configurator/schema.rs +++ b/libsql-server/src/namespace/configurator/schema.rs @@ -6,12 +6,11 @@ use tokio::task::JoinSet; use crate::connection::config::DatabaseConfig; use crate::connection::connection_manager::InnerWalManager; use crate::database::{Database, SchemaDatabase}; +use crate::namespace::broadcasters::BroadcasterHandle; use crate::namespace::meta_store::MetaStoreHandle; use crate::namespace::{ - Namespace, NamespaceName, NamespaceStore, - ResetCb, ResolveNamespacePathFn, RestoreOption, + Namespace, NamespaceName, NamespaceStore, ResetCb, ResolveNamespacePathFn, RestoreOption, }; -use crate::namespace::broadcasters::BroadcasterHandle; use crate::schema::SchedulerHandle; use super::helpers::{cleanup_primary, make_primary_connection_maker}; @@ -25,8 +24,18 @@ pub struct SchemaConfigurator { } impl SchemaConfigurator { - pub fn new(base: BaseNamespaceConfig, primary_config: PrimaryExtraConfig, make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, migration_scheduler: SchedulerHandle) -> Self { - Self { base, primary_config, make_wal_manager, migration_scheduler } + pub fn new( + base: BaseNamespaceConfig, + primary_config: PrimaryExtraConfig, + make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, + migration_scheduler: SchedulerHandle, + ) -> Self { + Self { + base, + primary_config, + make_wal_manager, + migration_scheduler, + } } } @@ -58,7 +67,7 @@ impl ConfigureNamespace for SchemaConfigurator { &mut join_set, resolve_attach_path, broadcaster, - self.make_wal_manager.clone() + self.make_wal_manager.clone(), ) .await?; @@ -94,7 +103,8 @@ impl ConfigureNamespace for SchemaConfigurator { db_config, prune_all, bottomless_db_id_init, - ).await + ) + .await }) } @@ -112,9 +122,10 @@ impl ConfigureNamespace for SchemaConfigurator { from_config, to_ns, to_config, - timestamp, + timestamp, store, &self.primary_config, - self.base.base_path.clone())) + self.base.base_path.clone(), + )) } } diff --git a/libsql-server/src/namespace/mod.rs b/libsql-server/src/namespace/mod.rs index 7cfa6b351c..2a2e3eb211 100644 --- a/libsql-server/src/namespace/mod.rs +++ b/libsql-server/src/namespace/mod.rs @@ -19,12 +19,12 @@ pub use self::name::NamespaceName; pub use self::store::NamespaceStore; pub mod broadcasters; +pub(crate) mod configurator; pub mod meta_store; mod name; pub mod replication_wal; mod schema_lock; mod store; -pub(crate) mod configurator; pub type ResetCb = Box; pub type ResolveNamespacePathFn = diff --git a/libsql-server/src/namespace/store.rs b/libsql-server/src/namespace/store.rs index b2b5d33032..a78e4f59b0 100644 --- a/libsql-server/src/namespace/store.rs +++ b/libsql-server/src/namespace/store.rs @@ -327,7 +327,6 @@ impl NamespaceStore { where Fun: FnOnce(&Namespace) -> R, { - dbg!(); if namespace != NamespaceName::default() && !self.inner.metadata.exists(&namespace) && !self.inner.allow_lazy_creation @@ -335,7 +334,6 @@ impl NamespaceStore { return Err(Error::NamespaceDoesntExist(namespace.to_string())); } - dbg!(); let f = { let name = namespace.clone(); move |ns: NamespaceEntry| async move { @@ -348,9 +346,7 @@ impl NamespaceStore { } }; - dbg!(); let handle = self.inner.metadata.handle(namespace.to_owned()); - dbg!(); f(self .load_namespace(&namespace, handle, RestoreOption::Latest) .await?) @@ -377,7 +373,6 @@ impl NamespaceStore { config: MetaStoreHandle, restore_option: RestoreOption, ) -> crate::Result { - dbg!(); let ns = self .get_configurator(&config.get()) .setup( @@ -391,7 +386,6 @@ impl NamespaceStore { ) .await?; - dbg!(); Ok(ns) } @@ -401,17 +395,13 @@ impl NamespaceStore { db_config: MetaStoreHandle, restore_option: RestoreOption, ) -> crate::Result { - dbg!(); let init = async { - dbg!(); let ns = self .make_namespace(namespace, db_config, restore_option) .await?; - dbg!(); Ok(Some(ns)) }; - dbg!(); let before_load = Instant::now(); let ns = self .inner @@ -420,8 +410,7 @@ impl NamespaceStore { namespace.clone(), init.map_ok(|ns| Arc::new(RwLock::new(ns))), ) - .await.map_err(|e| dbg!(e))?; - dbg!(); + .await?; NAMESPACE_LOAD_LATENCY.record(before_load.elapsed()); Ok(ns) diff --git a/libsql-server/src/schema/scheduler.rs b/libsql-server/src/schema/scheduler.rs index a8195cbbd0..57916bb9a5 100644 --- a/libsql-server/src/schema/scheduler.rs +++ b/libsql-server/src/schema/scheduler.rs @@ -830,16 +830,10 @@ mod test { .unwrap(); let (sender, mut receiver) = mpsc::channel(100); let config = make_config(sender.clone().into(), tmp.path()); - let store = NamespaceStore::new( - false, - false, - 10, - meta_store, - config, - DatabaseKind::Primary - ) - .await - .unwrap(); + let store = + NamespaceStore::new(false, false, 10, meta_store, config, DatabaseKind::Primary) + .await + .unwrap(); let mut scheduler = Scheduler::new(store.clone(), maker().unwrap()) .await .unwrap(); @@ -961,16 +955,10 @@ mod test { .unwrap(); let (sender, mut receiver) = mpsc::channel(100); let config = make_config(sender.clone().into(), tmp.path()); - let store = NamespaceStore::new( - false, - false, - 10, - meta_store, - config, - DatabaseKind::Primary - ) - .await - .unwrap(); + let store = + NamespaceStore::new(false, false, 10, meta_store, config, DatabaseKind::Primary) + .await + .unwrap(); let mut scheduler = Scheduler::new(store.clone(), maker().unwrap()) .await .unwrap(); @@ -1044,16 +1032,10 @@ mod test { .unwrap(); let (sender, _receiver) = mpsc::channel(100); let config = make_config(sender.clone().into(), tmp.path()); - let store = NamespaceStore::new( - false, - false, - 10, - meta_store, - config, - DatabaseKind::Primary, - ) - .await - .unwrap(); + let store = + NamespaceStore::new(false, false, 10, meta_store, config, DatabaseKind::Primary) + .await + .unwrap(); store .with("ns".into(), |ns| { @@ -1078,9 +1060,10 @@ mod test { .unwrap(); let (sender, mut receiver) = mpsc::channel(100); let config = make_config(sender.clone().into(), tmp.path()); - let store = NamespaceStore::new(false, false, 10, meta_store, config, DatabaseKind::Primary) - .await - .unwrap(); + let store = + NamespaceStore::new(false, false, 10, meta_store, config, DatabaseKind::Primary) + .await + .unwrap(); let mut scheduler = Scheduler::new(store.clone(), maker().unwrap()) .await .unwrap(); @@ -1151,16 +1134,10 @@ mod test { .unwrap(); let (sender, _receiver) = mpsc::channel(100); let config = make_config(sender.clone().into(), tmp.path()); - let store = NamespaceStore::new( - false, - false, - 10, - meta_store, - config, - DatabaseKind::Primary - ) - .await - .unwrap(); + let store = + NamespaceStore::new(false, false, 10, meta_store, config, DatabaseKind::Primary) + .await + .unwrap(); let scheduler = Scheduler::new(store.clone(), maker().unwrap()) .await .unwrap(); diff --git a/libsql-server/tests/cluster/mod.rs b/libsql-server/tests/cluster/mod.rs index 8f214bd05e..1171d4a5d0 100644 --- a/libsql-server/tests/cluster/mod.rs +++ b/libsql-server/tests/cluster/mod.rs @@ -149,29 +149,23 @@ fn sync_many_replica() { let mut sim = Builder::new() .simulation_duration(Duration::from_secs(1000)) .build(); - dbg!(); make_cluster(&mut sim, NUM_REPLICA, true); - dbg!(); sim.client("client", async { let db = Database::open_remote_with_connector("http://primary:8080", "", TurmoilConnector)?; let conn = db.connect()?; - dbg!(); conn.execute("create table test (x)", ()).await?; - dbg!(); conn.execute("insert into test values (42)", ()).await?; - dbg!(); async fn get_frame_no(url: &str) -> Option { let client = Client::new(); - dbg!(); Some( - dbg!(client - .get(url) - .await - .unwrap() - .json::() - .await) + client + .get(url) + .await + .unwrap() + .json::() + .await .unwrap() .get("replication_index")? .as_u64() @@ -179,7 +173,6 @@ fn sync_many_replica() { ) } - dbg!(); let primary_fno = loop { if let Some(fno) = get_frame_no("http://primary:9090/v1/namespaces/default/stats").await { @@ -187,15 +180,13 @@ fn sync_many_replica() { } }; - dbg!(); // wait for all replicas to sync let mut join_set = JoinSet::new(); for i in 0..NUM_REPLICA { join_set.spawn(async move { let uri = format!("http://replica{i}:9090/v1/namespaces/default/stats"); - dbg!(); loop { - if let Some(replica_fno) = dbg!(get_frame_no(&uri).await) { + if let Some(replica_fno) = get_frame_no(&uri).await { if replica_fno == primary_fno { break; } @@ -205,10 +196,8 @@ fn sync_many_replica() { }); } - dbg!(); while join_set.join_next().await.is_some() {} - dbg!(); for i in 0..NUM_REPLICA { let db = Database::open_remote_with_connector( format!("http://replica{i}:8080"), @@ -223,10 +212,8 @@ fn sync_many_replica() { )); } - dbg!(); let client = Client::new(); - dbg!(); let stats = client .get("http://primary:9090/v1/namespaces/default/stats") .await? @@ -234,14 +221,12 @@ fn sync_many_replica() { .await .unwrap(); - dbg!(); let stat = stats .get("embedded_replica_frames_replicated") .unwrap() .as_u64() .unwrap(); - dbg!(); assert_eq!(stat, 0); Ok(()) From e5b8c31005069982de27ad0b58b109929f45bf4b Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 6 Aug 2024 17:01:34 +0200 Subject: [PATCH 17/50] comment out libsql-wal replica configurator --- libsql-server/src/lib.rs | 34 +++--- .../configurator/libsql_wal_replica.rs | 115 +++++++++--------- .../src/namespace/configurator/mod.rs | 2 +- 3 files changed, 79 insertions(+), 72 deletions(-) diff --git a/libsql-server/src/lib.rs b/libsql-server/src/lib.rs index d26921dd00..9bf0419932 100644 --- a/libsql-server/src/lib.rs +++ b/libsql-server/src/lib.rs @@ -745,21 +745,27 @@ where }); let make_wal_manager = Arc::new(move || EitherWAL::B(wal.clone())); - let mut configurators = NamespaceConfigurators::empty(); + // let mut configurators = NamespaceConfigurators::empty(); + + // match client_config { + // Some(_) => todo!("configure replica"), + // // configure primary + // None => self.configure_primary_common( + // base_config, + // &mut configurators, + // make_wal_manager, + // migration_scheduler_handle, + // scripted_backup, + // ), + // } - match client_config { - Some(_) => todo!("configure replica"), - // configure primary - None => self.configure_primary_common( - base_config, - &mut configurators, - make_wal_manager, - migration_scheduler_handle, - scripted_backup, - ), - } - - Ok(configurators) + self.configurators_common( + base_config, + client_config, + make_wal_manager, + migration_scheduler_handle, + scripted_backup, + ) } #[cfg(feature = "durable-wal")] diff --git a/libsql-server/src/namespace/configurator/libsql_wal_replica.rs b/libsql-server/src/namespace/configurator/libsql_wal_replica.rs index f26738ec2a..6ab6cc52ef 100644 --- a/libsql-server/src/namespace/configurator/libsql_wal_replica.rs +++ b/libsql-server/src/namespace/configurator/libsql_wal_replica.rs @@ -46,63 +46,64 @@ impl ConfigureNamespace for LibsqlWalReplicaConfigurator { store: NamespaceStore, broadcaster: BroadcasterHandle, ) -> Pin> + Send + 'a>> { - Box::pin(async move { - tracing::debug!("creating replica namespace"); - let db_path = self.base.base_path.join("dbs").join(name.as_str()); - let channel = self.channel.clone(); - let uri = self.uri.clone(); - - let rpc_client = ReplicationLogClient::with_origin(channel.clone(), uri.clone()); - // TODO! setup replication - - let mut join_set = JoinSet::new(); - let namespace = name.clone(); - - let stats = make_stats( - &db_path, - &mut join_set, - db_config.clone(), - self.base.stats_sender.clone(), - name.clone(), - applied_frame_no_receiver.clone(), - ) - .await?; - - let connection_maker = MakeWriteProxyConn::new( - db_path.clone(), - self.base.extensions.clone(), - channel.clone(), - uri.clone(), - stats.clone(), - broadcaster, - db_config.clone(), - applied_frame_no_receiver, - self.base.max_response_size, - self.base.max_total_response_size, - primary_current_replication_index, - None, - resolve_attach_path, - self.make_wal_manager.clone(), - ) - .await? - .throttled( - self.base.max_concurrent_connections.clone(), - Some(DB_CREATE_TIMEOUT), - self.base.max_total_response_size, - self.base.max_concurrent_requests, - ); - - Ok(Namespace { - tasks: join_set, - db: Database::Replica(ReplicaDatabase { - connection_maker: Arc::new(connection_maker), - }), - name: name.clone(), - stats, - db_config_store: db_config, - path: db_path.into(), - }) - }) + todo!() + // Box::pin(async move { + // tracing::debug!("creating replica namespace"); + // let db_path = self.base.base_path.join("dbs").join(name.as_str()); + // let channel = self.channel.clone(); + // let uri = self.uri.clone(); + // + // let rpc_client = ReplicationLogClient::with_origin(channel.clone(), uri.clone()); + // // TODO! setup replication + // + // let mut join_set = JoinSet::new(); + // let namespace = name.clone(); + // + // let stats = make_stats( + // &db_path, + // &mut join_set, + // db_config.clone(), + // self.base.stats_sender.clone(), + // name.clone(), + // applied_frame_no_receiver.clone(), + // ) + // .await?; + // + // let connection_maker = MakeWriteProxyConn::new( + // db_path.clone(), + // self.base.extensions.clone(), + // channel.clone(), + // uri.clone(), + // stats.clone(), + // broadcaster, + // db_config.clone(), + // applied_frame_no_receiver, + // self.base.max_response_size, + // self.base.max_total_response_size, + // primary_current_replication_index, + // None, + // resolve_attach_path, + // self.make_wal_manager.clone(), + // ) + // .await? + // .throttled( + // self.base.max_concurrent_connections.clone(), + // Some(DB_CREATE_TIMEOUT), + // self.base.max_total_response_size, + // self.base.max_concurrent_requests, + // ); + // + // Ok(Namespace { + // tasks: join_set, + // db: Database::Replica(ReplicaDatabase { + // connection_maker: Arc::new(connection_maker), + // }), + // name: name.clone(), + // stats, + // db_config_store: db_config, + // path: db_path.into(), + // }) + // }) } fn cleanup<'a>( diff --git a/libsql-server/src/namespace/configurator/mod.rs b/libsql-server/src/namespace/configurator/mod.rs index 9122fc18de..0f8dcbd481 100644 --- a/libsql-server/src/namespace/configurator/mod.rs +++ b/libsql-server/src/namespace/configurator/mod.rs @@ -20,7 +20,7 @@ use super::{ pub mod fork; mod helpers; -mod libsql_wal_replica; +// mod libsql_wal_replica; mod primary; mod replica; mod schema; From 6e7fb9f06a901fe20a340cf9e54b1523c44f8eb5 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 6 Aug 2024 18:24:20 +0200 Subject: [PATCH 18/50] restore encryption config we don't actually care, but let's do it for completeness --- libsql-server/src/lib.rs | 1 + .../src/namespace/configurator/helpers.rs | 23 +++++++++++++++---- .../src/namespace/configurator/mod.rs | 2 ++ .../src/namespace/configurator/primary.rs | 8 ++++++- .../src/namespace/configurator/replica.rs | 1 + .../src/namespace/configurator/schema.rs | 1 + libsql-server/src/schema/scheduler.rs | 1 + 7 files changed, 31 insertions(+), 6 deletions(-) diff --git a/libsql-server/src/lib.rs b/libsql-server/src/lib.rs index 9bf0419932..4b97b442f5 100644 --- a/libsql-server/src/lib.rs +++ b/libsql-server/src/lib.rs @@ -458,6 +458,7 @@ where max_total_response_size: self.db_config.max_total_response_size, max_concurrent_connections: Arc::new(Semaphore::new(self.max_concurrent_connections)), max_concurrent_requests: self.db_config.max_concurrent_requests, + encryption_config: self.db_config.encryption_config.clone(), }; let configurators = self diff --git a/libsql-server/src/namespace/configurator/helpers.rs b/libsql-server/src/namespace/configurator/helpers.rs index a5a4c5121d..355b1b1472 100644 --- a/libsql-server/src/namespace/configurator/helpers.rs +++ b/libsql-server/src/namespace/configurator/helpers.rs @@ -9,6 +9,7 @@ use bytes::Bytes; use enclose::enclose; use futures::Stream; use libsql_sys::wal::Sqlite3WalManager; +use libsql_sys::EncryptionConfig; use tokio::io::AsyncBufReadExt as _; use tokio::sync::watch; use tokio::task::JoinSet; @@ -49,6 +50,7 @@ pub(super) async fn make_primary_connection_maker( resolve_attach_path: ResolveNamespacePathFn, broadcaster: BroadcasterHandle, make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, + encryption_config: Option, ) -> crate::Result<(PrimaryConnectionMaker, ReplicationWalWrapper, Arc)> { let db_config = meta_store_handle.get(); let bottomless_db_id = NamespaceBottomlessDbId::from_config(&db_config); @@ -102,7 +104,7 @@ pub(super) async fn make_primary_connection_maker( auto_checkpoint, primary_config.scripted_backup.clone(), name.clone(), - None, + encryption_config.clone(), )?); tracing::debug!("sending stats"); @@ -114,6 +116,7 @@ pub(super) async fn make_primary_connection_maker( base_config.stats_sender.clone(), name.clone(), logger.new_frame_notifier.subscribe(), + base_config.encryption_config.clone(), ) .await?; @@ -133,7 +136,7 @@ pub(super) async fn make_primary_connection_maker( base_config.max_total_response_size, auto_checkpoint, logger.new_frame_notifier.subscribe(), - None, + encryption_config, block_writes, resolve_attach_path, make_wal_manager.clone(), @@ -332,6 +335,7 @@ pub(super) async fn make_stats( stats_sender: StatsSender, name: NamespaceName, mut current_frame_no: watch::Receiver>, + encryption_config: Option, ) -> anyhow::Result> { tracing::debug!("creating stats type"); let stats = Stats::new(name.clone(), db_path, join_set).await?; @@ -358,7 +362,11 @@ pub(super) async fn make_stats( } }); - join_set.spawn(run_storage_monitor(db_path.into(), Arc::downgrade(&stats))); + join_set.spawn(run_storage_monitor( + db_path.into(), + Arc::downgrade(&stats), + encryption_config, + )); tracing::debug!("done sending stats, and creating bg tasks"); @@ -368,7 +376,11 @@ pub(super) async fn make_stats( // Periodically check the storage used by the database and save it in the Stats structure. // TODO: Once we have a separate fiber that does WAL checkpoints, running this routine // right after checkpointing is exactly where it should be done. -async fn run_storage_monitor(db_path: PathBuf, stats: Weak) -> anyhow::Result<()> { +async fn run_storage_monitor( + db_path: PathBuf, + stats: Weak, + encryption_config: Option, +) -> anyhow::Result<()> { // on initialization, the database file doesn't exist yet, so we wait a bit for it to be // created tokio::time::sleep(Duration::from_secs(1)).await; @@ -381,11 +393,12 @@ async fn run_storage_monitor(db_path: PathBuf, stats: Weak) -> anyhow::Re return Ok(()); }; + let encryption_config = encryption_config.clone(); let _ = tokio::task::spawn_blocking(move || { // because closing the last connection interferes with opening a new one, we lazily // initialize a connection here, and keep it alive for the entirety of the program. If we // fail to open it, we wait for `duration` and try again later. - match open_conn(&db_path, Sqlite3WalManager::new(), Some(rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY), None) { + match open_conn(&db_path, Sqlite3WalManager::new(), Some(rusqlite::OpenFlags::SQLITE_OPEN_READ_ONLY), encryption_config) { Ok(mut conn) => { if let Ok(tx) = conn.transaction() { let page_count = tx.query_row("pragma page_count;", [], |row| { row.get::(0) }); diff --git a/libsql-server/src/namespace/configurator/mod.rs b/libsql-server/src/namespace/configurator/mod.rs index 0f8dcbd481..b96d5a3824 100644 --- a/libsql-server/src/namespace/configurator/mod.rs +++ b/libsql-server/src/namespace/configurator/mod.rs @@ -5,6 +5,7 @@ use std::time::Duration; use chrono::NaiveDateTime; use futures::Future; +use libsql_sys::EncryptionConfig; use tokio::sync::Semaphore; use crate::connection::config::DatabaseConfig; @@ -38,6 +39,7 @@ pub struct BaseNamespaceConfig { pub(crate) max_total_response_size: u64, pub(crate) max_concurrent_connections: Arc, pub(crate) max_concurrent_requests: u64, + pub(crate) encryption_config: Option, } #[derive(Clone)] diff --git a/libsql-server/src/namespace/configurator/primary.rs b/libsql-server/src/namespace/configurator/primary.rs index 6c245a6e8f..03cdd2fd7b 100644 --- a/libsql-server/src/namespace/configurator/primary.rs +++ b/libsql-server/src/namespace/configurator/primary.rs @@ -1,7 +1,10 @@ +use std::path::Path; +use std::pin::Pin; use std::sync::atomic::{AtomicBool, Ordering}; -use std::{path::Path, pin::Pin, sync::Arc}; +use std::sync::Arc; use futures::prelude::Future; +use libsql_sys::EncryptionConfig; use tokio::task::JoinSet; use crate::connection::config::DatabaseConfig; @@ -49,6 +52,7 @@ impl PrimaryConfigurator { resolve_attach_path: ResolveNamespacePathFn, db_path: Arc, broadcaster: BroadcasterHandle, + encryption_config: Option, ) -> crate::Result { let mut join_set = JoinSet::new(); @@ -67,6 +71,7 @@ impl PrimaryConfigurator { resolve_attach_path, broadcaster, self.make_wal_manager.clone(), + encryption_config, ) .await?; let connection_maker = Arc::new(connection_maker); @@ -135,6 +140,7 @@ impl ConfigureNamespace for PrimaryConfigurator { resolve_attach_path, db_path.clone(), broadcaster, + self.base.encryption_config.clone(), ) .await { diff --git a/libsql-server/src/namespace/configurator/replica.rs b/libsql-server/src/namespace/configurator/replica.rs index 61dd48b0bf..84ebadb897 100644 --- a/libsql-server/src/namespace/configurator/replica.rs +++ b/libsql-server/src/namespace/configurator/replica.rs @@ -169,6 +169,7 @@ impl ConfigureNamespace for ReplicaConfigurator { self.base.stats_sender.clone(), name.clone(), applied_frame_no_receiver.clone(), + self.base.encryption_config.clone(), ) .await?; diff --git a/libsql-server/src/namespace/configurator/schema.rs b/libsql-server/src/namespace/configurator/schema.rs index 98e679513a..f95c8abf51 100644 --- a/libsql-server/src/namespace/configurator/schema.rs +++ b/libsql-server/src/namespace/configurator/schema.rs @@ -68,6 +68,7 @@ impl ConfigureNamespace for SchemaConfigurator { resolve_attach_path, broadcaster, self.make_wal_manager.clone(), + self.base.encryption_config.clone(), ) .await?; diff --git a/libsql-server/src/schema/scheduler.rs b/libsql-server/src/schema/scheduler.rs index 57916bb9a5..01a3d795d8 100644 --- a/libsql-server/src/schema/scheduler.rs +++ b/libsql-server/src/schema/scheduler.rs @@ -917,6 +917,7 @@ mod test { max_total_response_size: 100000000000, max_concurrent_connections: Arc::new(Semaphore::new(10)), max_concurrent_requests: 10000, + encryption_config: None, }; let primary_config = PrimaryExtraConfig { From 71c50e198fe0e61880cd2d5917af351c34a9f21e Mon Sep 17 00:00:00 2001 From: Lucio Franco Date: Tue, 6 Aug 2024 08:54:23 -0400 Subject: [PATCH 19/50] enable more windows CI --- .github/workflows/rust.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/rust.yml b/.github/workflows/rust.yml index 26eaba46cf..1903c0baff 100644 --- a/.github/workflows/rust.yml +++ b/.github/workflows/rust.yml @@ -159,8 +159,8 @@ jobs: target/ key: ${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }} restore-keys: ${{ runner.os }}-cargo- - - name: check libsql remote - run: cargo check -p libsql --no-default-features -F remote + - name: build libsql all features + run: cargo build -p libsql --all-features # test-rust-wasm: # runs-on: ubuntu-latest From 07dc9b5b6d36e6eeae188c9a330df34bb99337ce Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 7 Aug 2024 13:04:09 +0200 Subject: [PATCH 20/50] add LibsqlWalFooter --- libsql-wal/src/lib.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/libsql-wal/src/lib.rs b/libsql-wal/src/lib.rs index df104eda49..d46ade0010 100644 --- a/libsql-wal/src/lib.rs +++ b/libsql-wal/src/lib.rs @@ -15,6 +15,22 @@ const LIBSQL_MAGIC: u64 = u64::from_be_bytes(*b"LIBSQL\0\0"); const LIBSQL_PAGE_SIZE: u16 = 4096; const LIBSQL_WAL_VERSION: u16 = 1; +use zerocopy::byteorder::big_endian::{U64 as bu64, U16 as bu16}; +/// LibsqlFooter is located at the end of the libsql file. I contains libsql specific metadata, +/// while remaining fully compatible with sqlite (which just ignores that footer) +/// +/// The fields are in big endian to remain coherent with sqlite +#[derive(Copy, Clone, Debug, zerocopy::FromBytes, zerocopy::FromZeroes, zerocopy::AsBytes)] +#[repr(C)] +pub struct LibsqlFooter { + magic: bu64, + version: bu16, + /// Replication index checkpointed into this file. + /// only valid if there are no outstanding segments to checkpoint, since a checkpoint could be + /// partial. + replication_index: bu64, +} + #[cfg(any(debug_assertions, test))] pub mod test { use std::fs::OpenOptions; From 4069036362f41cc9015dab4a5698f6790e57e1f1 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 7 Aug 2024 13:48:40 +0200 Subject: [PATCH 21/50] cancel query when request is dropped --- libsql-server/src/connection/libsql.rs | 49 ++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 2 deletions(-) diff --git a/libsql-server/src/connection/libsql.rs b/libsql-server/src/connection/libsql.rs index aadff6190b..1acbbf2588 100644 --- a/libsql-server/src/connection/libsql.rs +++ b/libsql-server/src/connection/libsql.rs @@ -391,14 +391,43 @@ where ctx: RequestContext, builder: B, ) -> Result<(B, Program)> { + struct Bomb { + canceled: Arc, + defused: bool, + } + + impl Drop for Bomb { + fn drop(&mut self) { + if !self.defused { + tracing::debug!("cancelling request"); + self.canceled.store(true, Ordering::Relaxed); + } + } + } + + let canceled = { + let cancelled = self.inner.lock().canceled.clone(); + cancelled.store(false, Ordering::Relaxed); + cancelled + }; + + let mut bomb = Bomb { + canceled, + defused: false, + }; + PROGRAM_EXEC_COUNT.increment(1); check_program_auth(&ctx, &pgm, &self.inner.lock().config_store.get())?; let conn = self.inner.clone(); - BLOCKING_RT + let ret = BLOCKING_RT .spawn_blocking(move || Connection::run(conn, pgm, builder)) .await - .unwrap() + .unwrap(); + + bomb.defused = true; + + ret } } @@ -413,6 +442,7 @@ pub(super) struct Connection { forced_rollback: bool, broadcaster: BroadcasterHandle, hooked: bool, + canceled: Arc, } fn update_stats( @@ -475,6 +505,19 @@ impl Connection { ); } + let canceled = Arc::new(AtomicBool::new(false)); + + conn.progress_handler(100, { + let canceled = canceled.clone(); + Some(move || { + let canceled = canceled.load(Ordering::Relaxed); + if canceled { + tracing::debug!("request canceled"); + } + canceled + }) + }); + let this = Self { conn, stats, @@ -486,6 +529,7 @@ impl Connection { forced_rollback: false, broadcaster, hooked: false, + canceled, }; for ext in extensions.iter() { @@ -795,6 +839,7 @@ mod test { forced_rollback: false, broadcaster: Default::default(), hooked: false, + canceled: Arc::new(false.into()), }; let conn = Arc::new(Mutex::new(conn)); From 5924766712c783136b634ff325238a0fb1858cae Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 7 Aug 2024 13:15:12 +0200 Subject: [PATCH 22/50] write footer on checkpoint --- libsql-wal/src/lib.rs | 8 ++++---- libsql-wal/src/segment/list.rs | 18 +++++++++++++++++- 2 files changed, 21 insertions(+), 5 deletions(-) diff --git a/libsql-wal/src/lib.rs b/libsql-wal/src/lib.rs index d46ade0010..1c0dc63566 100644 --- a/libsql-wal/src/lib.rs +++ b/libsql-wal/src/lib.rs @@ -15,7 +15,7 @@ const LIBSQL_MAGIC: u64 = u64::from_be_bytes(*b"LIBSQL\0\0"); const LIBSQL_PAGE_SIZE: u16 = 4096; const LIBSQL_WAL_VERSION: u16 = 1; -use zerocopy::byteorder::big_endian::{U64 as bu64, U16 as bu16}; +use zerocopy::byteorder::big_endian::{U16 as bu16, U64 as bu64}; /// LibsqlFooter is located at the end of the libsql file. I contains libsql specific metadata, /// while remaining fully compatible with sqlite (which just ignores that footer) /// @@ -23,12 +23,12 @@ use zerocopy::byteorder::big_endian::{U64 as bu64, U16 as bu16}; #[derive(Copy, Clone, Debug, zerocopy::FromBytes, zerocopy::FromZeroes, zerocopy::AsBytes)] #[repr(C)] pub struct LibsqlFooter { - magic: bu64, - version: bu16, + pub magic: bu64, + pub version: bu16, /// Replication index checkpointed into this file. /// only valid if there are no outstanding segments to checkpoint, since a checkpoint could be /// partial. - replication_index: bu64, + pub replication_index: bu64, } #[cfg(any(debug_assertions, test))] diff --git a/libsql-wal/src/segment/list.rs b/libsql-wal/src/segment/list.rs index 25dfa3a32a..f1e3252161 100644 --- a/libsql-wal/src/segment/list.rs +++ b/libsql-wal/src/segment/list.rs @@ -15,6 +15,7 @@ use crate::error::Result; use crate::io::buf::{ZeroCopyBoxIoBuf, ZeroCopyBuf}; use crate::io::FileExt; use crate::segment::Frame; +use crate::{LibsqlFooter, LIBSQL_MAGIC, LIBSQL_PAGE_SIZE, LIBSQL_WAL_VERSION}; use super::Segment; @@ -157,6 +158,21 @@ where buf = read_buf.into_inner(); } + // update the footer at the end of the db file. + let footer = LibsqlFooter { + magic: LIBSQL_MAGIC.into(), + version: LIBSQL_WAL_VERSION.into(), + replication_index: last_replication_index.into(), + }; + + let footer_offset = size_after as usize * LIBSQL_PAGE_SIZE as usize; + let (_, ret) = db_file + .write_all_at_async(ZeroCopyBuf::new_init(footer), footer_offset as u64) + .await; + ret?; + + // todo: truncate if necessary + //// todo: make async db_file.sync_all()?; @@ -185,7 +201,7 @@ where Ok(Some(last_replication_index)) } - /// returnsstream pages from the sealed segment list, and what's the lowest replication index + /// returns a stream of pages from the sealed segment list, and what's the lowest replication index /// that was covered. If the returned index is less than start frame_no, the missing frames /// must be read somewhere else. pub async fn stream_pages_from<'a>( From d11ec010df9a049a17eb201785abaa6d8219f9d5 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 7 Aug 2024 15:38:56 +0200 Subject: [PATCH 23/50] downgrade debug to trace --- libsql-server/src/connection/libsql.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libsql-server/src/connection/libsql.rs b/libsql-server/src/connection/libsql.rs index 1acbbf2588..d98d6d0f82 100644 --- a/libsql-server/src/connection/libsql.rs +++ b/libsql-server/src/connection/libsql.rs @@ -399,7 +399,7 @@ where impl Drop for Bomb { fn drop(&mut self) { if !self.defused { - tracing::debug!("cancelling request"); + tracing::trace!("cancelling request"); self.canceled.store(true, Ordering::Relaxed); } } @@ -512,7 +512,7 @@ impl Connection { Some(move || { let canceled = canceled.load(Ordering::Relaxed); if canceled { - tracing::debug!("request canceled"); + tracing::trace!("request canceled"); } canceled }) From fc178de41fe56fd3ba845a9dc09bf71b6bf1b2d7 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 7 Aug 2024 15:42:01 +0200 Subject: [PATCH 24/50] add query canceled metric --- libsql-server/src/connection/libsql.rs | 5 ++++- libsql-server/src/metrics.rs | 5 +++++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/libsql-server/src/connection/libsql.rs b/libsql-server/src/connection/libsql.rs index d98d6d0f82..aa0604f03a 100644 --- a/libsql-server/src/connection/libsql.rs +++ b/libsql-server/src/connection/libsql.rs @@ -15,7 +15,9 @@ use tokio::sync::watch; use tokio::time::{Duration, Instant}; use crate::error::Error; -use crate::metrics::{DESCRIBE_COUNT, PROGRAM_EXEC_COUNT, VACUUM_COUNT, WAL_CHECKPOINT_COUNT}; +use crate::metrics::{ + DESCRIBE_COUNT, PROGRAM_EXEC_COUNT, QUERY_CANCELED, VACUUM_COUNT, WAL_CHECKPOINT_COUNT, +}; use crate::namespace::broadcasters::BroadcasterHandle; use crate::namespace::meta_store::MetaStoreHandle; use crate::namespace::ResolveNamespacePathFn; @@ -512,6 +514,7 @@ impl Connection { Some(move || { let canceled = canceled.load(Ordering::Relaxed); if canceled { + QUERY_CANCELED.increment(1); tracing::trace!("request canceled"); } canceled diff --git a/libsql-server/src/metrics.rs b/libsql-server/src/metrics.rs index a71b5ca979..1ac97435b3 100644 --- a/libsql-server/src/metrics.rs +++ b/libsql-server/src/metrics.rs @@ -153,3 +153,8 @@ pub static LISTEN_EVENTS_DROPPED: Lazy = Lazy::new(|| { describe_counter!(NAME, "Number of listen events dropped"); register_counter!(NAME) }); +pub static QUERY_CANCELED: Lazy = Lazy::new(|| { + const NAME: &str = "libsql_server_query_canceled"; + describe_counter!(NAME, "Number of canceled queries"); + register_counter!(NAME) +}); From 351e6ebfbec97e0a2c12f2d056f2876cb0058e38 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Wed, 7 Aug 2024 18:52:57 +0400 Subject: [PATCH 25/50] add simple integration test --- libsql/tests/integration_tests.rs | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/libsql/tests/integration_tests.rs b/libsql/tests/integration_tests.rs index 0f8e575949..cc239888d0 100644 --- a/libsql/tests/integration_tests.rs +++ b/libsql/tests/integration_tests.rs @@ -596,6 +596,22 @@ async fn debug_print_row() { ); } +#[tokio::test] +async fn fts5_invalid_tokenizer() { + let db = Database::open(":memory:").unwrap(); + let conn = db.connect().unwrap(); + assert!(conn.execute( + "CREATE VIRTUAL TABLE t USING fts5(s, tokenize='trigram case_sensitive ')", + (), + ) + .await.is_err()); + assert!(conn.execute( + "CREATE VIRTUAL TABLE t USING fts5(s, tokenize='trigram remove_diacritics ')", + (), + ) + .await.is_err()); +} + #[cfg(feature = "serde")] #[tokio::test] async fn deserialize_row() { From 3e56d28d8614a070dd632c6d54b0cdd1d2e08579 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Wed, 7 Aug 2024 16:57:49 +0400 Subject: [PATCH 26/50] fix potential crash in fts5 - see: https://sqlite.org/forum/forumpost/171bcc2bcd --- libsql-sqlite3/ext/fts5/fts5_tokenize.c | 60 ++++++++++++++----------- 1 file changed, 33 insertions(+), 27 deletions(-) diff --git a/libsql-sqlite3/ext/fts5/fts5_tokenize.c b/libsql-sqlite3/ext/fts5/fts5_tokenize.c index f12056170f..7e239b6ca5 100644 --- a/libsql-sqlite3/ext/fts5/fts5_tokenize.c +++ b/libsql-sqlite3/ext/fts5/fts5_tokenize.c @@ -1290,40 +1290,46 @@ static int fts5TriCreate( Fts5Tokenizer **ppOut ){ int rc = SQLITE_OK; - TrigramTokenizer *pNew = (TrigramTokenizer*)sqlite3_malloc(sizeof(*pNew)); - UNUSED_PARAM(pUnused); - if( pNew==0 ){ - rc = SQLITE_NOMEM; + TrigramTokenizer *pNew = 0; + + if( nArg%2 ){ + rc = SQLITE_ERROR; }else{ - int i; - pNew->bFold = 1; - pNew->iFoldParam = 0; - for(i=0; rc==SQLITE_OK && ibFold = 1; + pNew->iFoldParam = 0; + for(i=0; rc==SQLITE_OK && ibFold = (zArg[0]=='0'); + } + }else if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){ + if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){ + rc = SQLITE_ERROR; + }else{ + pNew->iFoldParam = (zArg[0]!='0') ? 2 : 0; + } }else{ - pNew->bFold = (zArg[0]=='0'); - } - }else if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){ - if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){ rc = SQLITE_ERROR; - }else{ - pNew->iFoldParam = (zArg[0]!='0') ? 2 : 0; } - }else{ - rc = SQLITE_ERROR; } - } - if( pNew->iFoldParam!=0 && pNew->bFold==0 ){ - rc = SQLITE_ERROR; - } + if( pNew->iFoldParam!=0 && pNew->bFold==0 ){ + rc = SQLITE_ERROR; + } - if( rc!=SQLITE_OK ){ - fts5TriDelete((Fts5Tokenizer*)pNew); - pNew = 0; + if( rc!=SQLITE_OK ){ + fts5TriDelete((Fts5Tokenizer*)pNew); + pNew = 0; + } } } *ppOut = (Fts5Tokenizer*)pNew; From 7ed14683a177ad913d63f06d5d6848846c6a0d00 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Wed, 7 Aug 2024 18:53:13 +0400 Subject: [PATCH 27/50] build bundles --- .../SQLite3MultipleCiphers/src/sqlite3.c | 61 +++++++++++-------- libsql-ffi/bundled/bindings/bindgen.rs | 16 +++-- libsql-ffi/bundled/src/sqlite3.c | 61 +++++++++++-------- 3 files changed, 80 insertions(+), 58 deletions(-) diff --git a/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c b/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c index 529af0d52e..d7587cc38b 100644 --- a/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c +++ b/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c @@ -28,6 +28,7 @@ ** README.md ** configure ** configure.ac +** ext/fts5/fts5_tokenize.c ** ext/jni/src/org/sqlite/jni/capi/CollationNeededCallback.java ** ext/jni/src/org/sqlite/jni/capi/CommitHookCallback.java ** ext/jni/src/org/sqlite/jni/capi/PreupdateHookCallback.java @@ -259750,40 +259751,46 @@ static int fts5TriCreate( Fts5Tokenizer **ppOut ){ int rc = SQLITE_OK; - TrigramTokenizer *pNew = (TrigramTokenizer*)sqlite3_malloc(sizeof(*pNew)); - UNUSED_PARAM(pUnused); - if( pNew==0 ){ - rc = SQLITE_NOMEM; + TrigramTokenizer *pNew = 0; + + if( nArg%2 ){ + rc = SQLITE_ERROR; }else{ - int i; - pNew->bFold = 1; - pNew->iFoldParam = 0; - for(i=0; rc==SQLITE_OK && ibFold = 1; + pNew->iFoldParam = 0; + for(i=0; rc==SQLITE_OK && ibFold = (zArg[0]=='0'); + } + }else if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){ + if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){ + rc = SQLITE_ERROR; + }else{ + pNew->iFoldParam = (zArg[0]!='0') ? 2 : 0; + } }else{ - pNew->bFold = (zArg[0]=='0'); - } - }else if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){ - if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){ rc = SQLITE_ERROR; - }else{ - pNew->iFoldParam = (zArg[0]!='0') ? 2 : 0; } - }else{ - rc = SQLITE_ERROR; } - } - if( pNew->iFoldParam!=0 && pNew->bFold==0 ){ - rc = SQLITE_ERROR; - } + if( pNew->iFoldParam!=0 && pNew->bFold==0 ){ + rc = SQLITE_ERROR; + } - if( rc!=SQLITE_OK ){ - fts5TriDelete((Fts5Tokenizer*)pNew); - pNew = 0; + if( rc!=SQLITE_OK ){ + fts5TriDelete((Fts5Tokenizer*)pNew); + pNew = 0; + } } } *ppOut = (Fts5Tokenizer*)pNew; diff --git a/libsql-ffi/bundled/bindings/bindgen.rs b/libsql-ffi/bundled/bindings/bindgen.rs index 9dec505c10..cc73807f33 100644 --- a/libsql-ffi/bundled/bindings/bindgen.rs +++ b/libsql-ffi/bundled/bindings/bindgen.rs @@ -940,7 +940,7 @@ extern "C" { extern "C" { pub fn sqlite3_vmprintf( arg1: *const ::std::os::raw::c_char, - arg2: va_list, + arg2: *mut __va_list_tag, ) -> *mut ::std::os::raw::c_char; } extern "C" { @@ -956,7 +956,7 @@ extern "C" { arg1: ::std::os::raw::c_int, arg2: *mut ::std::os::raw::c_char, arg3: *const ::std::os::raw::c_char, - arg4: va_list, + arg4: *mut __va_list_tag, ) -> *mut ::std::os::raw::c_char; } extern "C" { @@ -2503,7 +2503,7 @@ extern "C" { pub fn sqlite3_str_vappendf( arg1: *mut sqlite3_str, zFormat: *const ::std::os::raw::c_char, - arg2: va_list, + arg2: *mut __va_list_tag, ); } extern "C" { @@ -3524,4 +3524,12 @@ extern "C" { extern "C" { pub static sqlite3_wal_manager: libsql_wal_manager; } -pub type __builtin_va_list = *mut ::std::os::raw::c_char; +pub type __builtin_va_list = [__va_list_tag; 1usize]; +#[repr(C)] +#[derive(Debug, Copy, Clone)] +pub struct __va_list_tag { + pub gp_offset: ::std::os::raw::c_uint, + pub fp_offset: ::std::os::raw::c_uint, + pub overflow_arg_area: *mut ::std::os::raw::c_void, + pub reg_save_area: *mut ::std::os::raw::c_void, +} diff --git a/libsql-ffi/bundled/src/sqlite3.c b/libsql-ffi/bundled/src/sqlite3.c index 529af0d52e..d7587cc38b 100644 --- a/libsql-ffi/bundled/src/sqlite3.c +++ b/libsql-ffi/bundled/src/sqlite3.c @@ -28,6 +28,7 @@ ** README.md ** configure ** configure.ac +** ext/fts5/fts5_tokenize.c ** ext/jni/src/org/sqlite/jni/capi/CollationNeededCallback.java ** ext/jni/src/org/sqlite/jni/capi/CommitHookCallback.java ** ext/jni/src/org/sqlite/jni/capi/PreupdateHookCallback.java @@ -259750,40 +259751,46 @@ static int fts5TriCreate( Fts5Tokenizer **ppOut ){ int rc = SQLITE_OK; - TrigramTokenizer *pNew = (TrigramTokenizer*)sqlite3_malloc(sizeof(*pNew)); - UNUSED_PARAM(pUnused); - if( pNew==0 ){ - rc = SQLITE_NOMEM; + TrigramTokenizer *pNew = 0; + + if( nArg%2 ){ + rc = SQLITE_ERROR; }else{ - int i; - pNew->bFold = 1; - pNew->iFoldParam = 0; - for(i=0; rc==SQLITE_OK && ibFold = 1; + pNew->iFoldParam = 0; + for(i=0; rc==SQLITE_OK && ibFold = (zArg[0]=='0'); + } + }else if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){ + if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){ + rc = SQLITE_ERROR; + }else{ + pNew->iFoldParam = (zArg[0]!='0') ? 2 : 0; + } }else{ - pNew->bFold = (zArg[0]=='0'); - } - }else if( 0==sqlite3_stricmp(azArg[i], "remove_diacritics") ){ - if( (zArg[0]!='0' && zArg[0]!='1' && zArg[0]!='2') || zArg[1] ){ rc = SQLITE_ERROR; - }else{ - pNew->iFoldParam = (zArg[0]!='0') ? 2 : 0; } - }else{ - rc = SQLITE_ERROR; } - } - if( pNew->iFoldParam!=0 && pNew->bFold==0 ){ - rc = SQLITE_ERROR; - } + if( pNew->iFoldParam!=0 && pNew->bFold==0 ){ + rc = SQLITE_ERROR; + } - if( rc!=SQLITE_OK ){ - fts5TriDelete((Fts5Tokenizer*)pNew); - pNew = 0; + if( rc!=SQLITE_OK ){ + fts5TriDelete((Fts5Tokenizer*)pNew); + pNew = 0; + } } } *ppOut = (Fts5Tokenizer*)pNew; From 9595315d30ee56d9845e884fd3fae768352967b9 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 7 Aug 2024 20:42:27 +0200 Subject: [PATCH 28/50] init cancel bomb berfore query exec --- libsql-server/src/connection/libsql.rs | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/libsql-server/src/connection/libsql.rs b/libsql-server/src/connection/libsql.rs index aa0604f03a..9896164e55 100644 --- a/libsql-server/src/connection/libsql.rs +++ b/libsql-server/src/connection/libsql.rs @@ -413,14 +413,15 @@ where cancelled }; + PROGRAM_EXEC_COUNT.increment(1); + + check_program_auth(&ctx, &pgm, &self.inner.lock().config_store.get())?; + + // create the bomb right before spawning the blocking task. let mut bomb = Bomb { canceled, defused: false, }; - - PROGRAM_EXEC_COUNT.increment(1); - - check_program_auth(&ctx, &pgm, &self.inner.lock().config_store.get())?; let conn = self.inner.clone(); let ret = BLOCKING_RT .spawn_blocking(move || Connection::run(conn, pgm, builder)) From 0d411057ae5d42bffd1e451bfc2e5aa44ab72042 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Thu, 8 Aug 2024 00:33:27 +0400 Subject: [PATCH 29/50] cargo fmt --- libsql/tests/integration_tests.rs | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/libsql/tests/integration_tests.rs b/libsql/tests/integration_tests.rs index cc239888d0..cdb0a985c3 100644 --- a/libsql/tests/integration_tests.rs +++ b/libsql/tests/integration_tests.rs @@ -600,16 +600,20 @@ async fn debug_print_row() { async fn fts5_invalid_tokenizer() { let db = Database::open(":memory:").unwrap(); let conn = db.connect().unwrap(); - assert!(conn.execute( - "CREATE VIRTUAL TABLE t USING fts5(s, tokenize='trigram case_sensitive ')", - (), - ) - .await.is_err()); - assert!(conn.execute( - "CREATE VIRTUAL TABLE t USING fts5(s, tokenize='trigram remove_diacritics ')", - (), - ) - .await.is_err()); + assert!(conn + .execute( + "CREATE VIRTUAL TABLE t USING fts5(s, tokenize='trigram case_sensitive ')", + (), + ) + .await + .is_err()); + assert!(conn + .execute( + "CREATE VIRTUAL TABLE t USING fts5(s, tokenize='trigram remove_diacritics ')", + (), + ) + .await + .is_err()); } #[cfg(feature = "serde")] From 4085a0d35edd59c0c75bb917f4236dcdeeb0f574 Mon Sep 17 00:00:00 2001 From: Lucio Franco Date: Wed, 7 Aug 2024 17:40:39 -0400 Subject: [PATCH 30/50] libsql: downgrade failed prefetch log to debug --- libsql/src/replication/remote_client.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libsql/src/replication/remote_client.rs b/libsql/src/replication/remote_client.rs index dbab056938..d0052f50d9 100644 --- a/libsql/src/replication/remote_client.rs +++ b/libsql/src/replication/remote_client.rs @@ -135,7 +135,7 @@ impl RemoteClient { (hello_fut.await, None) }; self.prefetched_batch_log_entries = if let Ok(true) = hello.0 { - tracing::warn!( + tracing::debug!( "Frames prefetching failed because of new session token returned by handshake" ); None From b0bc6eb2f5686b2e1706dd06b51405e8f0257ecd Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Thu, 8 Aug 2024 12:13:13 +0400 Subject: [PATCH 31/50] publish sqld debug builds to the separate image name --- .github/workflows/publish-server.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/publish-server.yml b/.github/workflows/publish-server.yml index 10820457b8..d957195e40 100644 --- a/.github/workflows/publish-server.yml +++ b/.github/workflows/publish-server.yml @@ -118,7 +118,7 @@ jobs: context: . platforms: ${{ env.platform }} labels: ${{ steps.meta.outputs.labels }} - outputs: type=image,name=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }},push-by-digest=true,name-canonical=true,push=true + outputs: type=image,name=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-debug,push-by-digest=true,name-canonical=true,push=true build-args: | BUILD_DEBUG=true - From 51b1b490545524e846e52479743054fbbe2e1660 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Thu, 8 Aug 2024 13:23:09 +0400 Subject: [PATCH 32/50] remove digests artifacts from debug build step --- .github/workflows/publish-server.yml | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/.github/workflows/publish-server.yml b/.github/workflows/publish-server.yml index d957195e40..e1973fe47c 100644 --- a/.github/workflows/publish-server.yml +++ b/.github/workflows/publish-server.yml @@ -121,20 +121,6 @@ jobs: outputs: type=image,name=${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}-debug,push-by-digest=true,name-canonical=true,push=true build-args: | BUILD_DEBUG=true - - - name: Export digest - run: | - mkdir -p /tmp/digests - digest="${{ steps.build.outputs.digest }}" - touch "/tmp/digests/${digest#sha256:}" - - - name: Upload digest - uses: actions/upload-artifact@v4 - with: - name: digests-debug-${{ env.PLATFORM_PAIR }} - path: /tmp/digests/* - if-no-files-found: error - retention-days: 1 build-arm64: permissions: write-all From ec7bca5a20eb413ba658eef04e643d94f6fa562a Mon Sep 17 00:00:00 2001 From: wyhaya Date: Thu, 8 Aug 2024 12:24:36 +0800 Subject: [PATCH 33/50] Fix JSON f64 precision --- libsql/Cargo.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libsql/Cargo.toml b/libsql/Cargo.toml index efae2abea3..3d65f71c73 100644 --- a/libsql/Cargo.toml +++ b/libsql/Cargo.toml @@ -20,7 +20,7 @@ hyper = { workspace = true, features = ["client", "stream"], optional = true } hyper-rustls = { version = "0.25", features = ["webpki-roots"], optional = true } base64 = { version = "0.21", optional = true } serde = { version = "1", features = ["derive"], optional = true } -serde_json = { version = "1", optional = true } +serde_json = { version = "1", features = ["float_roundtrip"], optional = true } async-trait = "0.1" bitflags = { version = "2.4.0", optional = true } tower = { workspace = true, features = ["util"], optional = true } From 5eeba4330901f022963bf50cf35e3e7120cf1a09 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Thu, 8 Aug 2024 13:18:51 +0400 Subject: [PATCH 34/50] improve random row selection --- libsql-sqlite3/src/vectordiskann.c | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/libsql-sqlite3/src/vectordiskann.c b/libsql-sqlite3/src/vectordiskann.c index 95d473b630..8804aee119 100644 --- a/libsql-sqlite3/src/vectordiskann.c +++ b/libsql-sqlite3/src/vectordiskann.c @@ -442,6 +442,7 @@ int diskAnnCreateIndex( int type, dims; u64 maxNeighborsParam, blockSizeBytes; char *zSql; + const char *zRowidColumnName; char columnSqlDefs[VECTOR_INDEX_SQL_RENDER_LIMIT]; // definition of columns (e.g. index_key INTEGER BINARY, index_key1 TEXT, ...) char columnSqlNames[VECTOR_INDEX_SQL_RENDER_LIMIT]; // just column names (e.g. index_key, index_key1, index_key2, ...) if( vectorIdxKeyDefsRender(pKey, "index_key", columnSqlDefs, sizeof(columnSqlDefs)) != 0 ){ @@ -509,6 +510,7 @@ int diskAnnCreateIndex( columnSqlDefs, columnSqlNames ); + zRowidColumnName = "index_key"; }else{ zSql = sqlite3MPrintf( db, @@ -518,9 +520,31 @@ int diskAnnCreateIndex( columnSqlDefs, columnSqlNames ); + zRowidColumnName = "rowid"; } rc = sqlite3_exec(db, zSql, 0, 0, 0); sqlite3DbFree(db, zSql); + if( rc != SQLITE_OK ){ + return rc; + } + /* + * vector blobs are usually pretty huge (more than a page size, for example, node block for 1024d f32 embeddings with 1bit compression will occupy ~20KB) + * in this case, main table B-Tree takes on redundant shape where all leaf nodes has only 1 cell + * + * as we have a query which selects random row using OFFSET/LIMIT trick - we will need to read all these leaf nodes pages just to skip them + * so, in order to remove this overhead for random row selection - we creating an index with just single column used + * in this case B-Tree leafs will be full of rowids and the overhead for page reads will be very small + */ + zSql = sqlite3MPrintf( + db, + "CREATE INDEX IF NOT EXISTS \"%w\".%s_shadow_idx ON %s_shadow (%s)", + zDbSName, + zIdxName, + zIdxName, + zRowidColumnName + ); + rc = sqlite3_exec(db, zSql, 0, 0, 0); + sqlite3DbFree(db, zSql); return rc; } From 4b3e7e7544e68a03ec04f38a686791bb76886fd3 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Thu, 8 Aug 2024 14:20:41 +0400 Subject: [PATCH 35/50] fix random row selection query to have db name --- libsql-sqlite3/src/vectordiskann.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libsql-sqlite3/src/vectordiskann.c b/libsql-sqlite3/src/vectordiskann.c index 8804aee119..fc39e00d30 100644 --- a/libsql-sqlite3/src/vectordiskann.c +++ b/libsql-sqlite3/src/vectordiskann.c @@ -574,8 +574,8 @@ static int diskAnnSelectRandomShadowRow(const DiskAnnIndex *pIndex, u64 *pRowid) zSql = sqlite3MPrintf( pIndex->db, - "SELECT rowid FROM \"%w\".%s LIMIT 1 OFFSET ABS(RANDOM()) %% MAX((SELECT COUNT(*) FROM %s), 1)", - pIndex->zDbSName, pIndex->zShadow, pIndex->zShadow + "SELECT rowid FROM \"%w\".%s LIMIT 1 OFFSET ABS(RANDOM()) %% MAX((SELECT COUNT(*) FROM \"%w\".%s), 1)", + pIndex->zDbSName, pIndex->zShadow, pIndex->zDbSName, pIndex->zShadow ); if( zSql == NULL ){ rc = SQLITE_NOMEM_BKPT; From 0b41b5aa7c93639218caf6f765d6c4d26e0b0567 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Thu, 8 Aug 2024 14:33:31 +0400 Subject: [PATCH 36/50] build bundles --- .../SQLite3MultipleCiphers/src/sqlite3.c | 28 +++++++++++++++++-- libsql-ffi/bundled/src/sqlite3.c | 28 +++++++++++++++++-- 2 files changed, 52 insertions(+), 4 deletions(-) diff --git a/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c b/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c index d7587cc38b..15d09606fb 100644 --- a/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c +++ b/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c @@ -212002,6 +212002,7 @@ int diskAnnCreateIndex( int type, dims; u64 maxNeighborsParam, blockSizeBytes; char *zSql; + const char *zRowidColumnName; char columnSqlDefs[VECTOR_INDEX_SQL_RENDER_LIMIT]; // definition of columns (e.g. index_key INTEGER BINARY, index_key1 TEXT, ...) char columnSqlNames[VECTOR_INDEX_SQL_RENDER_LIMIT]; // just column names (e.g. index_key, index_key1, index_key2, ...) if( vectorIdxKeyDefsRender(pKey, "index_key", columnSqlDefs, sizeof(columnSqlDefs)) != 0 ){ @@ -212069,6 +212070,7 @@ int diskAnnCreateIndex( columnSqlDefs, columnSqlNames ); + zRowidColumnName = "index_key"; }else{ zSql = sqlite3MPrintf( db, @@ -212078,9 +212080,31 @@ int diskAnnCreateIndex( columnSqlDefs, columnSqlNames ); + zRowidColumnName = "rowid"; } rc = sqlite3_exec(db, zSql, 0, 0, 0); sqlite3DbFree(db, zSql); + if( rc != SQLITE_OK ){ + return rc; + } + /* + * vector blobs are usually pretty huge (more than a page size, for example, node block for 1024d f32 embeddings with 1bit compression will occupy ~20KB) + * in this case, main table B-Tree takes on redundant shape where all leaf nodes has only 1 cell + * + * as we have a query which selects random row using OFFSET/LIMIT trick - we will need to read all these leaf nodes pages just to skip them + * so, in order to remove this overhead for random row selection - we creating an index with just single column used + * in this case B-Tree leafs will be full of rowids and the overhead for page reads will be very small + */ + zSql = sqlite3MPrintf( + db, + "CREATE INDEX IF NOT EXISTS \"%w\".%s_shadow_idx ON %s_shadow (%s)", + zDbSName, + zIdxName, + zIdxName, + zRowidColumnName + ); + rc = sqlite3_exec(db, zSql, 0, 0, 0); + sqlite3DbFree(db, zSql); return rc; } @@ -212110,8 +212134,8 @@ static int diskAnnSelectRandomShadowRow(const DiskAnnIndex *pIndex, u64 *pRowid) zSql = sqlite3MPrintf( pIndex->db, - "SELECT rowid FROM \"%w\".%s LIMIT 1 OFFSET ABS(RANDOM()) %% MAX((SELECT COUNT(*) FROM %s), 1)", - pIndex->zDbSName, pIndex->zShadow, pIndex->zShadow + "SELECT rowid FROM \"%w\".%s LIMIT 1 OFFSET ABS(RANDOM()) %% MAX((SELECT COUNT(*) FROM \"%w\".%s), 1)", + pIndex->zDbSName, pIndex->zShadow, pIndex->zDbSName, pIndex->zShadow ); if( zSql == NULL ){ rc = SQLITE_NOMEM_BKPT; diff --git a/libsql-ffi/bundled/src/sqlite3.c b/libsql-ffi/bundled/src/sqlite3.c index d7587cc38b..15d09606fb 100644 --- a/libsql-ffi/bundled/src/sqlite3.c +++ b/libsql-ffi/bundled/src/sqlite3.c @@ -212002,6 +212002,7 @@ int diskAnnCreateIndex( int type, dims; u64 maxNeighborsParam, blockSizeBytes; char *zSql; + const char *zRowidColumnName; char columnSqlDefs[VECTOR_INDEX_SQL_RENDER_LIMIT]; // definition of columns (e.g. index_key INTEGER BINARY, index_key1 TEXT, ...) char columnSqlNames[VECTOR_INDEX_SQL_RENDER_LIMIT]; // just column names (e.g. index_key, index_key1, index_key2, ...) if( vectorIdxKeyDefsRender(pKey, "index_key", columnSqlDefs, sizeof(columnSqlDefs)) != 0 ){ @@ -212069,6 +212070,7 @@ int diskAnnCreateIndex( columnSqlDefs, columnSqlNames ); + zRowidColumnName = "index_key"; }else{ zSql = sqlite3MPrintf( db, @@ -212078,9 +212080,31 @@ int diskAnnCreateIndex( columnSqlDefs, columnSqlNames ); + zRowidColumnName = "rowid"; } rc = sqlite3_exec(db, zSql, 0, 0, 0); sqlite3DbFree(db, zSql); + if( rc != SQLITE_OK ){ + return rc; + } + /* + * vector blobs are usually pretty huge (more than a page size, for example, node block for 1024d f32 embeddings with 1bit compression will occupy ~20KB) + * in this case, main table B-Tree takes on redundant shape where all leaf nodes has only 1 cell + * + * as we have a query which selects random row using OFFSET/LIMIT trick - we will need to read all these leaf nodes pages just to skip them + * so, in order to remove this overhead for random row selection - we creating an index with just single column used + * in this case B-Tree leafs will be full of rowids and the overhead for page reads will be very small + */ + zSql = sqlite3MPrintf( + db, + "CREATE INDEX IF NOT EXISTS \"%w\".%s_shadow_idx ON %s_shadow (%s)", + zDbSName, + zIdxName, + zIdxName, + zRowidColumnName + ); + rc = sqlite3_exec(db, zSql, 0, 0, 0); + sqlite3DbFree(db, zSql); return rc; } @@ -212110,8 +212134,8 @@ static int diskAnnSelectRandomShadowRow(const DiskAnnIndex *pIndex, u64 *pRowid) zSql = sqlite3MPrintf( pIndex->db, - "SELECT rowid FROM \"%w\".%s LIMIT 1 OFFSET ABS(RANDOM()) %% MAX((SELECT COUNT(*) FROM %s), 1)", - pIndex->zDbSName, pIndex->zShadow, pIndex->zShadow + "SELECT rowid FROM \"%w\".%s LIMIT 1 OFFSET ABS(RANDOM()) %% MAX((SELECT COUNT(*) FROM \"%w\".%s), 1)", + pIndex->zDbSName, pIndex->zShadow, pIndex->zDbSName, pIndex->zShadow ); if( zSql == NULL ){ rc = SQLITE_NOMEM_BKPT; From f80444ac450d19754bf11de98753be6f43ca8332 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Thu, 8 Aug 2024 14:50:29 +0400 Subject: [PATCH 37/50] fix test --- libsql-sqlite3/test/libsql_vector_index.test | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libsql-sqlite3/test/libsql_vector_index.test b/libsql-sqlite3/test/libsql_vector_index.test index 7308b2d93f..c1a270e4da 100644 --- a/libsql-sqlite3/test/libsql_vector_index.test +++ b/libsql-sqlite3/test/libsql_vector_index.test @@ -140,7 +140,7 @@ do_execsql_test vector-sql { INSERT INTO t_sql VALUES(vector('[1,2,3]')), (vector('[2,3,4]')); SELECT sql FROM sqlite_master WHERE name LIKE '%t_sql%'; SELECT name FROM libsql_vector_meta_shadow WHERE name = 't_sql_idx'; -} {{CREATE TABLE t_sql( v FLOAT32(3))} {CREATE TABLE t_sql_idx_shadow (index_key INTEGER , data BLOB, PRIMARY KEY (index_key))} {CREATE INDEX t_sql_idx ON t_sql( libsql_vector_idx(v) )} {t_sql_idx}} +} {{CREATE TABLE t_sql( v FLOAT32(3))} {CREATE TABLE t_sql_idx_shadow (index_key INTEGER , data BLOB, PRIMARY KEY (index_key))} {CREATE INDEX t_sql_idx_shadow_idx ON t_sql_idx_shadow (index_key)} {CREATE INDEX t_sql_idx ON t_sql( libsql_vector_idx(v) )} {t_sql_idx}} do_execsql_test vector-drop-index { CREATE TABLE t_index_drop( v FLOAT32(3)); From 8441108e713d2aa67606cf91385fe82c066045d7 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Thu, 8 Aug 2024 18:31:38 +0400 Subject: [PATCH 38/50] allow vector index to be partial --- libsql-sqlite3/src/vectorIndex.c | 5 --- libsql-sqlite3/test/libsql_vector_index.test | 33 ++++++++++++++++++-- 2 files changed, 30 insertions(+), 8 deletions(-) diff --git a/libsql-sqlite3/src/vectorIndex.c b/libsql-sqlite3/src/vectorIndex.c index d8b3497781..001a1aae10 100644 --- a/libsql-sqlite3/src/vectorIndex.c +++ b/libsql-sqlite3/src/vectorIndex.c @@ -862,11 +862,6 @@ int vectorIndexCreate(Parse *pParse, const Index *pIdx, const char *zDbSName, co sqlite3ErrorMsg(pParse, "vector index: must contain exactly one column wrapped into the " VECTOR_INDEX_MARKER_FUNCTION " function"); return CREATE_FAIL; } - // we are able to support this but I doubt this works for now - more polishing required to make this work - if( pIdx->pPartIdxWhere != NULL ) { - sqlite3ErrorMsg(pParse, "vector index: where condition is forbidden"); - return CREATE_FAIL; - } pArgsList = pIdx->aColExpr->a[0].pExpr->x.pList; pListItem = pArgsList->a; diff --git a/libsql-sqlite3/test/libsql_vector_index.test b/libsql-sqlite3/test/libsql_vector_index.test index c1a270e4da..a173c773d3 100644 --- a/libsql-sqlite3/test/libsql_vector_index.test +++ b/libsql-sqlite3/test/libsql_vector_index.test @@ -275,6 +275,36 @@ do_execsql_test vector-all-params { SELECT * FROM vector_top_k('t_all_params_idx', vector('[1,2]'), 2); } {1 2} +do_execsql_test vector-partial { + CREATE TABLE t_partial( name TEXT, type INT, v FLOAT32(3)); + INSERT INTO t_partial VALUES ( 'a', 0, vector('[1,2,3]') ); + INSERT INTO t_partial VALUES ( 'b', 1, vector('[3,4,5]') ); + INSERT INTO t_partial VALUES ( 'c', 2, vector('[4,5,6]') ); + INSERT INTO t_partial VALUES ( 'd', 0, vector('[5,6,7]') ); + INSERT INTO t_partial VALUES ( 'e', 1, vector('[6,7,8]') ); + INSERT INTO t_partial VALUES ( 'f', 2, vector('[7,8,9]') ); + CREATE INDEX t_partial_idx_0 ON t_partial( libsql_vector_idx(v) ) WHERE type = 0; + CREATE INDEX t_partial_idx_1 ON t_partial( libsql_vector_idx(v) ) WHERE type = 1; + CREATE INDEX t_partial_idx_not_0 ON t_partial( libsql_vector_idx(v) ) WHERE type != 0; + SELECT id FROM vector_top_k('t_partial_idx_0', vector('[1,2,3]'), 10); + SELECT id FROM vector_top_k('t_partial_idx_1', vector('[1,2,3]'), 10); + SELECT id FROM vector_top_k('t_partial_idx_not_0', vector('[1,2,3]'), 10); + INSERT INTO t_partial VALUES ( 'g', 0, vector('[8,9,10]') ); + INSERT INTO t_partial VALUES ( 'h', 1, vector('[9,10,11]') ); + INSERT INTO t_partial VALUES ( 'i', 2, vector('[10,11,12]') ); + SELECT id FROM vector_top_k('t_partial_idx_0', vector('[1,2,3]'), 10); + SELECT id FROM vector_top_k('t_partial_idx_1', vector('[1,2,3]'), 10); + SELECT id FROM vector_top_k('t_partial_idx_not_0', vector('[1,2,3]'), 10); +} { + 1 4 + 2 5 + 2 3 5 6 + + 1 4 7 + 2 5 8 + 2 3 5 6 8 9 +} + proc error_messages {sql} { set ret "" catch { @@ -309,8 +339,6 @@ do_test vector-errors { sqlite3_exec db { CREATE TABLE t_mixed_t( v FLOAT32(3)); } sqlite3_exec db { INSERT INTO t_mixed_t VALUES('[1]'); } lappend ret [error_messages {CREATE INDEX t_mixed_t_idx ON t_mixed_t( libsql_vector_idx(v) )}] - sqlite3_exec db { CREATE TABLE t_partial( name TEXT, type INT, v FLOAT32(3)); } - lappend ret [error_messages {CREATE INDEX t_partial_idx ON t_partial( libsql_vector_idx(v) ) WHERE type = 0}] } [list {*}{ {no such table: main.t_no} {no such column: v} @@ -328,5 +356,4 @@ do_test vector-errors { {vector index(insert): only f32 vectors are supported} {vector index(search): dimensions are different: 2 != 4} {vector index(insert): dimensions are different: 1 != 3} - {vector index: where condition is forbidden} }] From ec996fabf462d841b9256f032c102db7d2a95e79 Mon Sep 17 00:00:00 2001 From: Nikita Sivukhin Date: Thu, 8 Aug 2024 19:40:55 +0400 Subject: [PATCH 39/50] build bundles --- libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c | 5 ----- libsql-ffi/bundled/src/sqlite3.c | 5 ----- 2 files changed, 10 deletions(-) diff --git a/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c b/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c index 15d09606fb..3a76f9cff3 100644 --- a/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c +++ b/libsql-ffi/bundled/SQLite3MultipleCiphers/src/sqlite3.c @@ -214523,11 +214523,6 @@ int vectorIndexCreate(Parse *pParse, const Index *pIdx, const char *zDbSName, co sqlite3ErrorMsg(pParse, "vector index: must contain exactly one column wrapped into the " VECTOR_INDEX_MARKER_FUNCTION " function"); return CREATE_FAIL; } - // we are able to support this but I doubt this works for now - more polishing required to make this work - if( pIdx->pPartIdxWhere != NULL ) { - sqlite3ErrorMsg(pParse, "vector index: where condition is forbidden"); - return CREATE_FAIL; - } pArgsList = pIdx->aColExpr->a[0].pExpr->x.pList; pListItem = pArgsList->a; diff --git a/libsql-ffi/bundled/src/sqlite3.c b/libsql-ffi/bundled/src/sqlite3.c index 15d09606fb..3a76f9cff3 100644 --- a/libsql-ffi/bundled/src/sqlite3.c +++ b/libsql-ffi/bundled/src/sqlite3.c @@ -214523,11 +214523,6 @@ int vectorIndexCreate(Parse *pParse, const Index *pIdx, const char *zDbSName, co sqlite3ErrorMsg(pParse, "vector index: must contain exactly one column wrapped into the " VECTOR_INDEX_MARKER_FUNCTION " function"); return CREATE_FAIL; } - // we are able to support this but I doubt this works for now - more polishing required to make this work - if( pIdx->pPartIdxWhere != NULL ) { - sqlite3ErrorMsg(pParse, "vector index: where condition is forbidden"); - return CREATE_FAIL; - } pArgsList = pIdx->aColExpr->a[0].pExpr->x.pList; pListItem = pArgsList->a; From 7c4ea18c75598e7cd2b84d2940d9c6a0f380d571 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 8 Aug 2024 10:22:19 +0200 Subject: [PATCH 40/50] abstract replicator injector and introduce SqliteInjector --- bottomless/src/replicator.rs | 9 +- libsql-replication/src/injector/error.rs | 2 + libsql-replication/src/injector/mod.rs | 298 +-------------- .../injector/{ => sqlite_injector}/headers.rs | 0 .../{ => sqlite_injector}/injector_wal.rs | 0 .../src/injector/sqlite_injector/mod.rs | 345 ++++++++++++++++++ libsql-replication/src/replicator.rs | 95 ++--- libsql/src/replication/mod.rs | 7 +- 8 files changed, 418 insertions(+), 338 deletions(-) rename libsql-replication/src/injector/{ => sqlite_injector}/headers.rs (100%) rename libsql-replication/src/injector/{ => sqlite_injector}/injector_wal.rs (100%) create mode 100644 libsql-replication/src/injector/sqlite_injector/mod.rs diff --git a/bottomless/src/replicator.rs b/bottomless/src/replicator.rs index f2ef812f75..26e190df66 100644 --- a/bottomless/src/replicator.rs +++ b/bottomless/src/replicator.rs @@ -17,6 +17,7 @@ use aws_sdk_s3::primitives::ByteStream; use aws_sdk_s3::{Client, Config}; use bytes::{Buf, Bytes}; use chrono::{DateTime, NaiveDateTime, TimeZone, Utc}; +use libsql_replication::injector::Injector as _; use libsql_sys::{Cipher, EncryptionConfig}; use std::ops::Deref; use std::path::{Path, PathBuf}; @@ -1449,12 +1450,12 @@ impl Replicator { db_path: &Path, ) -> Result { let encryption_config = self.encryption_config.clone(); - let mut injector = libsql_replication::injector::Injector::new( - db_path, + let mut injector = libsql_replication::injector::SqliteInjector::new( + db_path.to_path_buf(), 4096, libsql_sys::connection::NO_AUTOCHECKPOINT, encryption_config, - )?; + ).await?; let prefix = format!("{}-{}/", self.db_name, generation); let mut page_buf = { let mut v = Vec::with_capacity(page_size); @@ -1552,7 +1553,7 @@ impl Replicator { }, page_buf.as_slice(), ); - injector.inject_frame(frame_to_inject)?; + injector.inject_frame(frame_to_inject).await?; applied_wal_frame = true; } } diff --git a/libsql-replication/src/injector/error.rs b/libsql-replication/src/injector/error.rs index 14899089ea..b1cebfe28b 100644 --- a/libsql-replication/src/injector/error.rs +++ b/libsql-replication/src/injector/error.rs @@ -1,3 +1,5 @@ +pub type Result = std::result::Result; + #[derive(Debug, thiserror::Error)] pub enum Error { #[error("IO error: {0}")] diff --git a/libsql-replication/src/injector/mod.rs b/libsql-replication/src/injector/mod.rs index 80443964fe..1d69ae0aab 100644 --- a/libsql-replication/src/injector/mod.rs +++ b/libsql-replication/src/injector/mod.rs @@ -1,299 +1,27 @@ -use std::path::Path; -use std::sync::Arc; -use std::{collections::VecDeque, path::PathBuf}; +use std::future::Future; -use parking_lot::Mutex; -use rusqlite::OpenFlags; +pub use sqlite_injector::SqliteInjector; use crate::frame::{Frame, FrameNo}; +use error::Result; pub use error::Error; -use self::injector_wal::{ - InjectorWal, InjectorWalManager, LIBSQL_INJECT_FATAL, LIBSQL_INJECT_OK, LIBSQL_INJECT_OK_TXN, -}; - mod error; -mod headers; -mod injector_wal; - -#[derive(Debug)] -pub enum InjectError {} - -pub type FrameBuffer = Arc>>; - -pub struct Injector { - /// The injector is in a transaction state - is_txn: bool, - /// Buffer for holding current transaction frames - buffer: FrameBuffer, - /// Maximum capacity of the frame buffer - capacity: usize, - /// Injector connection - // connection must be dropped before the hook context - connection: Arc>>, - biggest_uncommitted_seen: FrameNo, - - // Connection config items used to recreate the injection connection - path: PathBuf, - encryption_config: Option, - auto_checkpoint: u32, -} - -/// Methods from this trait are called before and after performing a frame injection. -/// This trait trait is used to record the last committed frame_no to the log. -/// The implementer can persist the pre and post commit frame no, and compare them in the event of -/// a crash; if the pre and post commit frame_no don't match, then the log may be corrupted. -impl Injector { - pub fn new( - path: impl AsRef, - capacity: usize, - auto_checkpoint: u32, - encryption_config: Option, - ) -> Result { - let path = path.as_ref().to_path_buf(); - - let buffer = FrameBuffer::default(); - let wal_manager = InjectorWalManager::new(buffer.clone()); - let connection = libsql_sys::Connection::open( - &path, - OpenFlags::SQLITE_OPEN_READ_WRITE - | OpenFlags::SQLITE_OPEN_CREATE - | OpenFlags::SQLITE_OPEN_URI - | OpenFlags::SQLITE_OPEN_NO_MUTEX, - wal_manager, - auto_checkpoint, - encryption_config.clone(), - )?; - - Ok(Self { - is_txn: false, - buffer, - capacity, - connection: Arc::new(Mutex::new(connection)), - biggest_uncommitted_seen: 0, - - path, - encryption_config, - auto_checkpoint, - }) - } - - /// Inject a frame into the log. If this was a commit frame, returns Ok(Some(FrameNo)). - pub fn inject_frame(&mut self, frame: Frame) -> Result, Error> { - let frame_close_txn = frame.header().size_after.get() != 0; - self.buffer.lock().push_back(frame); - if frame_close_txn || self.buffer.lock().len() >= self.capacity { - return self.flush(); - } +mod sqlite_injector; - Ok(None) - } +pub trait Injector { + /// Inject a singular frame. + fn inject_frame( + &mut self, + frame: Frame, + ) -> impl Future>> + Send; - pub fn rollback(&mut self) { - let conn = self.connection.lock(); - let mut rollback = conn.prepare_cached("ROLLBACK").unwrap(); - let _ = rollback.execute(()); - self.is_txn = false; - } + /// Discard any uncommintted frames. + fn rollback(&mut self) -> impl Future + Send; /// Flush the buffer to libsql WAL. /// Trigger a dummy write, and flush the cache to trigger a call to xFrame. The buffer's frame /// are then injected into the wal. - pub fn flush(&mut self) -> Result, Error> { - match self.try_flush() { - Err(e) => { - // something went wrong, rollback the connection to make sure we can retry in a - // clean state - self.biggest_uncommitted_seen = 0; - self.rollback(); - Err(e) - } - Ok(ret) => Ok(ret), - } - } - - fn try_flush(&mut self) -> Result, Error> { - if !self.is_txn { - self.begin_txn()?; - } - - let lock = self.buffer.lock(); - // the frames in the buffer are either monotonically increasing (log) or decreasing - // (snapshot). Either way, we want to find the biggest frameno we're about to commit, and - // that is either the front or the back of the buffer - let last_frame_no = match lock.back().zip(lock.front()) { - Some((b, f)) => f.header().frame_no.get().max(b.header().frame_no.get()), - None => { - tracing::trace!("nothing to inject"); - return Ok(None); - } - }; - - self.biggest_uncommitted_seen = self.biggest_uncommitted_seen.max(last_frame_no); - - drop(lock); - - let connection = self.connection.lock(); - // use prepare cached to avoid parsing the same statement over and over again. - let mut stmt = - connection.prepare_cached("INSERT INTO libsql_temp_injection VALUES (42)")?; - - // We execute the statement, and then force a call to xframe if necesacary. If the execute - // succeeds, then xframe wasn't called, in this case, we call cache_flush, and then process - // the error. - // It is unexpected that execute flushes, but it is possible, so we handle that case. - match stmt.execute(()).and_then(|_| connection.cache_flush()) { - Ok(_) => panic!("replication hook was not called"), - Err(e) => { - if let Some(e) = e.sqlite_error() { - if e.extended_code == LIBSQL_INJECT_OK { - // refresh schema - connection.pragma_update(None, "writable_schema", "reset")?; - let mut rollback = connection.prepare_cached("ROLLBACK")?; - let _ = rollback.execute(()); - self.is_txn = false; - assert!(self.buffer.lock().is_empty()); - let commit_frame_no = self.biggest_uncommitted_seen; - self.biggest_uncommitted_seen = 0; - return Ok(Some(commit_frame_no)); - } else if e.extended_code == LIBSQL_INJECT_OK_TXN { - self.is_txn = true; - assert!(self.buffer.lock().is_empty()); - return Ok(None); - } else if e.extended_code == LIBSQL_INJECT_FATAL { - return Err(Error::FatalInjectError); - } - } - - Err(Error::FatalInjectError) - } - } - } - - fn begin_txn(&mut self) -> Result<(), Error> { - let mut conn = self.connection.lock(); - - { - let wal_manager = InjectorWalManager::new(self.buffer.clone()); - let new_conn = libsql_sys::Connection::open( - &self.path, - OpenFlags::SQLITE_OPEN_READ_WRITE - | OpenFlags::SQLITE_OPEN_CREATE - | OpenFlags::SQLITE_OPEN_URI - | OpenFlags::SQLITE_OPEN_NO_MUTEX, - wal_manager, - self.auto_checkpoint, - self.encryption_config.clone(), - )?; - - let _ = std::mem::replace(&mut *conn, new_conn); - } - - conn.pragma_update(None, "writable_schema", "true")?; - - let mut stmt = conn.prepare_cached("BEGIN IMMEDIATE")?; - stmt.execute(())?; - // we create a dummy table. This table MUST not be persisted, otherwise the replica schema - // would differ with the primary's. - let mut stmt = - conn.prepare_cached("CREATE TABLE IF NOT EXISTS libsql_temp_injection (x)")?; - stmt.execute(())?; - - Ok(()) - } - - pub fn clear_buffer(&mut self) { - self.buffer.lock().clear() - } - - #[cfg(test)] - pub fn is_txn(&self) -> bool { - self.is_txn - } -} - -#[cfg(test)] -mod test { - use crate::frame::FrameBorrowed; - use std::mem::size_of; - - use super::*; - /// this this is generated by creating a table test, inserting 5 rows into it, and then - /// truncating the wal file of it's header. - const WAL: &[u8] = include_bytes!("../../assets/test/test_wallog"); - - fn wal_log() -> impl Iterator { - WAL.chunks(size_of::()) - .map(|b| Frame::try_from(b).unwrap()) - } - - #[test] - fn test_simple_inject_frames() { - let temp = tempfile::tempdir().unwrap(); - - let mut injector = Injector::new(temp.path().join("data"), 10, 10000, None).unwrap(); - let log = wal_log(); - for frame in log { - injector.inject_frame(frame).unwrap(); - } - - let conn = rusqlite::Connection::open(temp.path().join("data")).unwrap(); - - conn.query_row("SELECT COUNT(*) FROM test", (), |row| { - assert_eq!(row.get::<_, usize>(0).unwrap(), 5); - Ok(()) - }) - .unwrap(); - } - - #[test] - fn test_inject_frames_split_txn() { - let temp = tempfile::tempdir().unwrap(); - - // inject one frame at a time - let mut injector = Injector::new(temp.path().join("data"), 1, 10000, None).unwrap(); - let log = wal_log(); - for frame in log { - injector.inject_frame(frame).unwrap(); - } - - let conn = rusqlite::Connection::open(temp.path().join("data")).unwrap(); - - conn.query_row("SELECT COUNT(*) FROM test", (), |row| { - assert_eq!(row.get::<_, usize>(0).unwrap(), 5); - Ok(()) - }) - .unwrap(); - } - - #[test] - fn test_inject_partial_txn_isolated() { - let temp = tempfile::tempdir().unwrap(); - - // inject one frame at a time - let mut injector = Injector::new(temp.path().join("data"), 10, 1000, None).unwrap(); - let mut frames = wal_log(); - - assert!(injector - .inject_frame(frames.next().unwrap()) - .unwrap() - .is_none()); - let conn = rusqlite::Connection::open(temp.path().join("data")).unwrap(); - assert!(conn - .query_row("SELECT COUNT(*) FROM test", (), |_| Ok(())) - .is_err()); - - while injector - .inject_frame(frames.next().unwrap()) - .unwrap() - .is_none() - {} - - // reset schema - conn.pragma_update(None, "writable_schema", "reset") - .unwrap(); - conn.query_row("SELECT COUNT(*) FROM test", (), |_| Ok(())) - .unwrap(); - } + fn flush(&mut self) -> impl Future>> + Send; } diff --git a/libsql-replication/src/injector/headers.rs b/libsql-replication/src/injector/sqlite_injector/headers.rs similarity index 100% rename from libsql-replication/src/injector/headers.rs rename to libsql-replication/src/injector/sqlite_injector/headers.rs diff --git a/libsql-replication/src/injector/injector_wal.rs b/libsql-replication/src/injector/sqlite_injector/injector_wal.rs similarity index 100% rename from libsql-replication/src/injector/injector_wal.rs rename to libsql-replication/src/injector/sqlite_injector/injector_wal.rs diff --git a/libsql-replication/src/injector/sqlite_injector/mod.rs b/libsql-replication/src/injector/sqlite_injector/mod.rs new file mode 100644 index 0000000000..dea78ce4b5 --- /dev/null +++ b/libsql-replication/src/injector/sqlite_injector/mod.rs @@ -0,0 +1,345 @@ +use std::path::Path; +use std::sync::Arc; +use std::{collections::VecDeque, path::PathBuf}; + +use parking_lot::Mutex; +use rusqlite::OpenFlags; +use tokio::task::spawn_blocking; + +use crate::frame::{Frame, FrameNo}; + +use self::injector_wal::{ + InjectorWal, InjectorWalManager, LIBSQL_INJECT_FATAL, LIBSQL_INJECT_OK, LIBSQL_INJECT_OK_TXN, +}; + +use super::error::Result; +use super::{Error, Injector}; + +mod headers; +mod injector_wal; + +pub type FrameBuffer = Arc>>; + +pub struct SqliteInjector { + pub(in super::super) inner: Arc>, +} + +impl Injector for SqliteInjector { + async fn inject_frame( + &mut self, + frame: Frame, + ) -> Result> { + let inner = self.inner.clone(); + spawn_blocking(move || { + inner.lock().inject_frame(frame) + }).await.unwrap() + } + + async fn rollback(&mut self) { + let inner = self.inner.clone(); + spawn_blocking(move || { + inner.lock().rollback() + }).await.unwrap(); + } + + async fn flush(&mut self) -> Result> { + let inner = self.inner.clone(); + spawn_blocking(move || { + inner.lock().flush() + }).await.unwrap() + } +} + +impl SqliteInjector { + pub async fn new( + path: PathBuf, + capacity: usize, + auto_checkpoint: u32, + encryption_config: Option, + ) ->super::Result { + let inner = spawn_blocking(move || { + SqliteInjectorInner::new(path, capacity, auto_checkpoint, encryption_config) + }).await.unwrap()?; + + Ok(Self { + inner: Arc::new(Mutex::new(inner)) + }) + } +} + +pub(in super::super) struct SqliteInjectorInner { + /// The injector is in a transaction state + is_txn: bool, + /// Buffer for holding current transaction frames + buffer: FrameBuffer, + /// Maximum capacity of the frame buffer + capacity: usize, + /// Injector connection + // connection must be dropped before the hook context + connection: Arc>>, + biggest_uncommitted_seen: FrameNo, + + // Connection config items used to recreate the injection connection + path: PathBuf, + encryption_config: Option, + auto_checkpoint: u32, +} + +/// Methods from this trait are called before and after performing a frame injection. +/// This trait trait is used to record the last committed frame_no to the log. +/// The implementer can persist the pre and post commit frame no, and compare them in the event of +/// a crash; if the pre and post commit frame_no don't match, then the log may be corrupted. +impl SqliteInjectorInner { + fn new( + path: impl AsRef, + capacity: usize, + auto_checkpoint: u32, + encryption_config: Option, + ) -> Result { + let path = path.as_ref().to_path_buf(); + + let buffer = FrameBuffer::default(); + let wal_manager = InjectorWalManager::new(buffer.clone()); + let connection = libsql_sys::Connection::open( + &path, + OpenFlags::SQLITE_OPEN_READ_WRITE + | OpenFlags::SQLITE_OPEN_CREATE + | OpenFlags::SQLITE_OPEN_URI + | OpenFlags::SQLITE_OPEN_NO_MUTEX, + wal_manager, + auto_checkpoint, + encryption_config.clone(), + )?; + + Ok(Self { + is_txn: false, + buffer, + capacity, + connection: Arc::new(Mutex::new(connection)), + biggest_uncommitted_seen: 0, + + path, + encryption_config, + auto_checkpoint, + }) + } + + /// Inject a frame into the log. If this was a commit frame, returns Ok(Some(FrameNo)). + pub fn inject_frame(&mut self, frame: Frame) -> Result, Error> { + let frame_close_txn = frame.header().size_after.get() != 0; + self.buffer.lock().push_back(frame); + if frame_close_txn || self.buffer.lock().len() >= self.capacity { + return self.flush(); + } + + Ok(None) + } + + pub fn rollback(&mut self) { + self.clear_buffer(); + let conn = self.connection.lock(); + let mut rollback = conn.prepare_cached("ROLLBACK").unwrap(); + let _ = rollback.execute(()); + self.is_txn = false; + } + + /// Flush the buffer to libsql WAL. + /// Trigger a dummy write, and flush the cache to trigger a call to xFrame. The buffer's frame + /// are then injected into the wal. + pub fn flush(&mut self) -> Result, Error> { + match self.try_flush() { + Err(e) => { + // something went wrong, rollback the connection to make sure we can retry in a + // clean state + self.biggest_uncommitted_seen = 0; + self.rollback(); + Err(e) + } + Ok(ret) => Ok(ret), + } + } + + fn try_flush(&mut self) -> Result, Error> { + if !self.is_txn { + self.begin_txn()?; + } + + let lock = self.buffer.lock(); + // the frames in the buffer are either monotonically increasing (log) or decreasing + // (snapshot). Either way, we want to find the biggest frameno we're about to commit, and + // that is either the front or the back of the buffer + let last_frame_no = match lock.back().zip(lock.front()) { + Some((b, f)) => f.header().frame_no.get().max(b.header().frame_no.get()), + None => { + tracing::trace!("nothing to inject"); + return Ok(None); + } + }; + + self.biggest_uncommitted_seen = self.biggest_uncommitted_seen.max(last_frame_no); + + drop(lock); + + let connection = self.connection.lock(); + // use prepare cached to avoid parsing the same statement over and over again. + let mut stmt = + connection.prepare_cached("INSERT INTO libsql_temp_injection VALUES (42)")?; + + // We execute the statement, and then force a call to xframe if necesacary. If the execute + // succeeds, then xframe wasn't called, in this case, we call cache_flush, and then process + // the error. + // It is unexpected that execute flushes, but it is possible, so we handle that case. + match stmt.execute(()).and_then(|_| connection.cache_flush()) { + Ok(_) => panic!("replication hook was not called"), + Err(e) => { + if let Some(e) = e.sqlite_error() { + if e.extended_code == LIBSQL_INJECT_OK { + // refresh schema + connection.pragma_update(None, "writable_schema", "reset")?; + let mut rollback = connection.prepare_cached("ROLLBACK")?; + let _ = rollback.execute(()); + self.is_txn = false; + assert!(self.buffer.lock().is_empty()); + let commit_frame_no = self.biggest_uncommitted_seen; + self.biggest_uncommitted_seen = 0; + return Ok(Some(commit_frame_no)); + } else if e.extended_code == LIBSQL_INJECT_OK_TXN { + self.is_txn = true; + assert!(self.buffer.lock().is_empty()); + return Ok(None); + } else if e.extended_code == LIBSQL_INJECT_FATAL { + return Err(Error::FatalInjectError); + } + } + + Err(Error::FatalInjectError) + } + } + } + + fn begin_txn(&mut self) -> Result<(), Error> { + let mut conn = self.connection.lock(); + + { + let wal_manager = InjectorWalManager::new(self.buffer.clone()); + let new_conn = libsql_sys::Connection::open( + &self.path, + OpenFlags::SQLITE_OPEN_READ_WRITE + | OpenFlags::SQLITE_OPEN_CREATE + | OpenFlags::SQLITE_OPEN_URI + | OpenFlags::SQLITE_OPEN_NO_MUTEX, + wal_manager, + self.auto_checkpoint, + self.encryption_config.clone(), + )?; + + let _ = std::mem::replace(&mut *conn, new_conn); + } + + conn.pragma_update(None, "writable_schema", "true")?; + + let mut stmt = conn.prepare_cached("BEGIN IMMEDIATE")?; + stmt.execute(())?; + // we create a dummy table. This table MUST not be persisted, otherwise the replica schema + // would differ with the primary's. + let mut stmt = + conn.prepare_cached("CREATE TABLE IF NOT EXISTS libsql_temp_injection (x)")?; + stmt.execute(())?; + + Ok(()) + } + + pub fn clear_buffer(&mut self) { + self.buffer.lock().clear() + } + + #[cfg(test)] + pub fn is_txn(&self) -> bool { + self.is_txn + } +} + +#[cfg(test)] +mod test { + use crate::frame::FrameBorrowed; + use std::mem::size_of; + + use super::*; + /// this this is generated by creating a table test, inserting 5 rows into it, and then + /// truncating the wal file of it's header. + const WAL: &[u8] = include_bytes!("../../../assets/test/test_wallog"); + + fn wal_log() -> impl Iterator { + WAL.chunks(size_of::()) + .map(|b| Frame::try_from(b).unwrap()) + } + + #[test] + fn test_simple_inject_frames() { + let temp = tempfile::tempdir().unwrap(); + + let mut injector = SqliteInjectorInner::new(temp.path().join("data"), 10, 10000, None).unwrap(); + let log = wal_log(); + for frame in log { + injector.inject_frame(frame).unwrap(); + } + + let conn = rusqlite::Connection::open(temp.path().join("data")).unwrap(); + + conn.query_row("SELECT COUNT(*) FROM test", (), |row| { + assert_eq!(row.get::<_, usize>(0).unwrap(), 5); + Ok(()) + }) + .unwrap(); + } + + #[test] + fn test_inject_frames_split_txn() { + let temp = tempfile::tempdir().unwrap(); + + // inject one frame at a time + let mut injector = SqliteInjectorInner::new(temp.path().join("data"), 1, 10000, None).unwrap(); + let log = wal_log(); + for frame in log { + injector.inject_frame(frame).unwrap(); + } + + let conn = rusqlite::Connection::open(temp.path().join("data")).unwrap(); + + conn.query_row("SELECT COUNT(*) FROM test", (), |row| { + assert_eq!(row.get::<_, usize>(0).unwrap(), 5); + Ok(()) + }) + .unwrap(); + } + + #[test] + fn test_inject_partial_txn_isolated() { + let temp = tempfile::tempdir().unwrap(); + + // inject one frame at a time + let mut injector = SqliteInjectorInner::new(temp.path().join("data"), 10, 1000, None).unwrap(); + let mut frames = wal_log(); + + assert!(injector + .inject_frame(frames.next().unwrap()) + .unwrap() + .is_none()); + let conn = rusqlite::Connection::open(temp.path().join("data")).unwrap(); + assert!(conn + .query_row("SELECT COUNT(*) FROM test", (), |_| Ok(())) + .is_err()); + + while injector + .inject_frame(frames.next().unwrap()) + .unwrap() + .is_none() + {} + + // reset schema + conn.pragma_update(None, "writable_schema", "reset") + .unwrap(); + conn.query_row("SELECT COUNT(*) FROM test", (), |_| Ok(())) + .unwrap(); + } +} diff --git a/libsql-replication/src/replicator.rs b/libsql-replication/src/replicator.rs index bc1eada7f8..31c766faad 100644 --- a/libsql-replication/src/replicator.rs +++ b/libsql-replication/src/replicator.rs @@ -1,14 +1,11 @@ use std::path::PathBuf; -use std::sync::Arc; -use parking_lot::Mutex; -use tokio::task::spawn_blocking; use tokio::time::Duration; use tokio_stream::{Stream, StreamExt}; use tonic::{Code, Status}; use crate::frame::{Frame, FrameNo}; -use crate::injector::Injector; +use crate::injector::{Injector, SqliteInjector}; use crate::rpc::replication::{ Frame as RpcFrame, NAMESPACE_DOESNT_EXIST, NEED_SNAPSHOT_ERROR_MSG, NO_HELLO_ERROR_MSG, }; @@ -137,9 +134,9 @@ where /// The `Replicator`'s duty is to download frames from the primary, and pass them to the injector at /// transaction boundaries. -pub struct Replicator { +pub struct Replicator { client: C, - injector: Arc>, + injector: I, state: ReplicatorState, frames_synced: usize, } @@ -154,33 +151,42 @@ enum ReplicatorState { Exit, } -impl Replicator { +impl Replicator +where + C: ReplicatorClient, +{ /// Creates a replicator for the db file pointed at by `db_path` - pub async fn new( + pub async fn new_sqlite( client: C, db_path: PathBuf, auto_checkpoint: u32, encryption_config: Option, ) -> Result { - let injector = { - let db_path = db_path.clone(); - spawn_blocking(move || { - Injector::new( - db_path, - INJECTOR_BUFFER_CAPACITY, - auto_checkpoint, - encryption_config, - ) - }) - .await?? - }; + let injector = SqliteInjector::new( + db_path.clone(), + INJECTOR_BUFFER_CAPACITY, + auto_checkpoint, + encryption_config, + ) + .await?; + + Ok(Self::new(client, injector)) + } +} - Ok(Self { +impl Replicator +where + C: ReplicatorClient, + I: Injector, +{ + + pub fn new(client: C, injector: I) -> Self { + Self { client, - injector: Arc::new(Mutex::new(injector)), + injector, state: ReplicatorState::NeedHandshake, frames_synced: 0, - }) + } } /// for a handshake on next call to replicate. @@ -250,7 +256,7 @@ impl Replicator { // in case of error we rollback the current injector transaction, and start over. if ret.is_err() { self.client.rollback(); - self.injector.lock().rollback(); + self.injector.rollback().await; } self.state = match ret { @@ -293,7 +299,8 @@ impl Replicator { } async fn load_snapshot(&mut self) -> Result<(), Error> { - self.injector.lock().clear_buffer(); + self.client.rollback(); + self.injector.rollback().await; loop { match self.client.snapshot().await { Ok(mut stream) => { @@ -315,26 +322,22 @@ impl Replicator { async fn inject_frame(&mut self, frame: Frame) -> Result<(), Error> { self.frames_synced += 1; - let injector = self.injector.clone(); - match spawn_blocking(move || injector.lock().inject_frame(frame)).await? { - Ok(Some(commit_fno)) => { + match self.injector.inject_frame(frame).await? { + Some(commit_fno) => { self.client.commit_frame_no(commit_fno).await?; } - Ok(None) => (), - Err(e) => Err(e)?, + None => (), } Ok(()) } pub async fn flush(&mut self) -> Result<(), Error> { - let injector = self.injector.clone(); - match spawn_blocking(move || injector.lock().flush()).await? { - Ok(Some(commit_fno)) => { + match self.injector.flush().await? { + Some(commit_fno) => { self.client.commit_frame_no(commit_fno).await?; } - Ok(None) => (), - Err(e) => Err(e)?, + None => (), } Ok(()) @@ -395,7 +398,7 @@ mod test { fn rollback(&mut self) {} } - let mut replicator = Replicator::new(Client, tmp.path().to_path_buf(), 10000, None) + let mut replicator = Replicator::new_sqlite(Client, tmp.path().to_path_buf(), 10000, None) .await .unwrap(); @@ -438,7 +441,7 @@ mod test { fn rollback(&mut self) {} } - let mut replicator = Replicator::new(Client, tmp.path().to_path_buf(), 10000, None) + let mut replicator = Replicator::new_sqlite(Client, tmp.path().to_path_buf(), 10000, None) .await .unwrap(); // we assume that we already received the handshake and the handshake is not valid anymore @@ -482,7 +485,7 @@ mod test { fn rollback(&mut self) {} } - let mut replicator = Replicator::new(Client, tmp.path().to_path_buf(), 10000, None) + let mut replicator = Replicator::new_sqlite(Client, tmp.path().to_path_buf(), 10000, None) .await .unwrap(); // we assume that we already received the handshake and the handshake is not valid anymore @@ -526,7 +529,7 @@ mod test { fn rollback(&mut self) {} } - let mut replicator = Replicator::new(Client, tmp.path().to_path_buf(), 10000, None) + let mut replicator = Replicator::new_sqlite(Client, tmp.path().to_path_buf(), 10000, None) .await .unwrap(); // we assume that we already received the handshake and the handshake is not valid anymore @@ -568,7 +571,7 @@ mod test { fn rollback(&mut self) {} } - let mut replicator = Replicator::new(Client, tmp.path().to_path_buf(), 10000, None) + let mut replicator = Replicator::new_sqlite(Client, tmp.path().to_path_buf(), 10000, None) .await .unwrap(); // we assume that we already received the handshake and the handshake is not valid anymore @@ -610,7 +613,7 @@ mod test { fn rollback(&mut self) {} } - let mut replicator = Replicator::new(Client, tmp.path().to_path_buf(), 10000, None) + let mut replicator = Replicator::new_sqlite(Client, tmp.path().to_path_buf(), 10000, None) .await .unwrap(); replicator.state = ReplicatorState::NeedSnapshot; @@ -653,7 +656,7 @@ mod test { fn rollback(&mut self) {} } - let mut replicator = Replicator::new(Client, tmp.path().to_path_buf(), 10000, None) + let mut replicator = Replicator::new_sqlite(Client, tmp.path().to_path_buf(), 10000, None) .await .unwrap(); // we assume that we already received the handshake and the handshake is not valid anymore @@ -696,7 +699,7 @@ mod test { fn rollback(&mut self) {} } - let mut replicator = Replicator::new(Client, tmp.path().to_path_buf(), 10000, None) + let mut replicator = Replicator::new_sqlite(Client, tmp.path().to_path_buf(), 10000, None) .await .unwrap(); replicator.state = ReplicatorState::NeedHandshake; @@ -784,7 +787,7 @@ mod test { committed_frame_no: None, }; - let mut replicator = Replicator::new(client, tmp.path().to_path_buf(), 10000, None) + let mut replicator = Replicator::new_sqlite(client, tmp.path().to_path_buf(), 10000, None) .await .unwrap(); @@ -795,7 +798,7 @@ mod test { replicator.try_replicate_step().await.unwrap_err(), Error::Client(_) )); - assert!(!replicator.injector.lock().is_txn()); + assert!(!replicator.injector.inner.lock().is_txn()); assert!(replicator.client_mut().committed_frame_no.is_none()); assert_eq!(replicator.state, ReplicatorState::NeedHandshake); @@ -805,7 +808,7 @@ mod test { replicator.client_mut().should_error = false; replicator.try_replicate_step().await.unwrap(); - assert!(!replicator.injector.lock().is_txn()); + assert!(!replicator.injector.inner.lock().is_txn()); assert_eq!(replicator.state, ReplicatorState::Exit); assert_eq!(replicator.client_mut().committed_frame_no, Some(6)); } diff --git a/libsql/src/replication/mod.rs b/libsql/src/replication/mod.rs index 69cc0b5db2..2f4e9b49c0 100644 --- a/libsql/src/replication/mod.rs +++ b/libsql/src/replication/mod.rs @@ -6,6 +6,7 @@ use std::sync::Arc; use std::time::Duration; pub use libsql_replication::frame::{Frame, FrameNo}; +use libsql_replication::injector::SqliteInjector; use libsql_replication::replicator::{Either, Replicator}; pub use libsql_replication::snapshot::SnapshotFile; @@ -129,7 +130,7 @@ impl Writer { #[derive(Clone)] pub(crate) struct EmbeddedReplicator { - replicator: Arc>>>, + replicator: Arc, SqliteInjector>>>, bg_abort: Option>, last_frames_synced: Arc, } @@ -149,7 +150,7 @@ impl EmbeddedReplicator { perodic_sync: Option, ) -> Result { let replicator = Arc::new(Mutex::new( - Replicator::new( + Replicator::new_sqlite( Either::Left(client), db_path, auto_checkpoint, @@ -193,7 +194,7 @@ impl EmbeddedReplicator { encryption_config: Option, ) -> Result { let replicator = Arc::new(Mutex::new( - Replicator::new( + Replicator::new_sqlite( Either::Right(client), db_path, auto_checkpoint, From 4b5baacad57965b639dcab274a4a97f8b0ed3abd Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 8 Aug 2024 11:57:15 +0200 Subject: [PATCH 41/50] introduce libsql injector --- Cargo.lock | 2 + Cargo.toml | 1 + libsql-replication/Cargo.toml | 1 + .../proto/replication_log.proto | 6 +++ libsql-replication/src/frame.rs | 3 +- libsql-replication/src/generated/wal_log.rs | 42 +++++++++++++++ libsql-replication/src/injector/error.rs | 5 +- .../src/injector/libsql_injector.rs | 44 +++++++++++++++ libsql-replication/src/injector/mod.rs | 2 + .../src/injector/sqlite_injector/mod.rs | 12 ++--- libsql-replication/src/rpc.rs | 5 +- libsql-server/Cargo.toml | 2 +- .../src/replication/replicator_client.rs | 6 ++- libsql-server/src/rpc/replication_log.rs | 5 ++ libsql-wal/Cargo.toml | 3 +- libsql-wal/src/replication/injector.rs | 23 ++++---- libsql-wal/src/segment/current.rs | 4 +- libsql-wal/src/shared_wal.rs | 8 +-- libsql-wal/src/transaction.rs | 53 +++++++++++++++++-- libsql/src/replication/remote_client.rs | 3 +- 20 files changed, 196 insertions(+), 34 deletions(-) create mode 100644 libsql-replication/src/injector/libsql_injector.rs diff --git a/Cargo.lock b/Cargo.lock index 17cfc0e090..7e19e03e9a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -3702,6 +3702,7 @@ name = "libsql-wal" version = "0.1.0" dependencies = [ "arc-swap", + "async-lock 3.4.0", "async-stream", "aws-config 1.5.4", "aws-credential-types 1.2.0", @@ -3779,6 +3780,7 @@ dependencies = [ "cbc", "libsql-rusqlite", "libsql-sys", + "libsql-wal", "parking_lot", "prost", "prost-build", diff --git a/Cargo.toml b/Cargo.toml index 9381fb83f3..685f14964f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -45,6 +45,7 @@ rusqlite = { package = "libsql-rusqlite", path = "vendored/rusqlite", version = ] } hyper = { version = "0.14" } tower = { version = "0.4.13" } +zerocopy = { version = "0.7.32", features = ["derive", "alloc"] } # Config for 'cargo dist' [workspace.metadata.dist] diff --git a/libsql-replication/Cargo.toml b/libsql-replication/Cargo.toml index d2a9431cba..2a03d362bf 100644 --- a/libsql-replication/Cargo.toml +++ b/libsql-replication/Cargo.toml @@ -12,6 +12,7 @@ license = "MIT" tonic = { version = "0.11", features = ["tls"] } prost = "0.12" libsql-sys = { version = "0.7", path = "../libsql-sys", default-features = false, features = ["wal", "rusqlite", "api"] } +libsql-wal = { path = "../libsql-wal/" } rusqlite = { workspace = true } parking_lot = "0.12.1" bytes = { version = "1.5.0", features = ["serde"] } diff --git a/libsql-replication/proto/replication_log.proto b/libsql-replication/proto/replication_log.proto index 6208874609..b3be419319 100644 --- a/libsql-replication/proto/replication_log.proto +++ b/libsql-replication/proto/replication_log.proto @@ -9,6 +9,12 @@ message LogOffset { message HelloRequest { optional uint64 handshake_version = 1; + enum WalFlavor { + Sqlite = 0; + Libsql = 1; + } + // the type of wal that the client is expecting + optional WalFlavor wal_flavor = 2; } message HelloResponse { diff --git a/libsql-replication/src/frame.rs b/libsql-replication/src/frame.rs index a6a2854e52..55b5b778b5 100644 --- a/libsql-replication/src/frame.rs +++ b/libsql-replication/src/frame.rs @@ -13,7 +13,6 @@ use crate::LIBSQL_PAGE_SIZE; pub type FrameNo = u64; /// The file header for the WAL log. All fields are represented in little-endian ordering. -/// See `encode` and `decode` for actual layout. // repr C for stable sizing #[repr(C)] #[derive(Debug, Clone, Copy, zerocopy::FromZeroes, zerocopy::FromBytes, zerocopy::AsBytes)] @@ -22,7 +21,7 @@ pub struct FrameHeader { pub frame_no: lu64, /// Rolling checksum of all the previous frames, including this one. pub checksum: lu64, - /// page number, if frame_type is FrameType::Page + /// page number pub page_no: lu32, /// Size of the database (in page) after committing the transaction. This is passed from sqlite, /// and serves as commit transaction boundary diff --git a/libsql-replication/src/generated/wal_log.rs b/libsql-replication/src/generated/wal_log.rs index 2d7330e732..441881c4a7 100644 --- a/libsql-replication/src/generated/wal_log.rs +++ b/libsql-replication/src/generated/wal_log.rs @@ -10,6 +10,48 @@ pub struct LogOffset { pub struct HelloRequest { #[prost(uint64, optional, tag = "1")] pub handshake_version: ::core::option::Option, + /// the type of wal that the client is expecting + #[prost(enumeration = "hello_request::WalFlavor", optional, tag = "2")] + pub wal_flavor: ::core::option::Option, +} +/// Nested message and enum types in `HelloRequest`. +pub mod hello_request { + #[derive( + Clone, + Copy, + Debug, + PartialEq, + Eq, + Hash, + PartialOrd, + Ord, + ::prost::Enumeration + )] + #[repr(i32)] + pub enum WalFlavor { + Sqlite = 0, + Libsql = 1, + } + impl WalFlavor { + /// String value of the enum field names used in the ProtoBuf definition. + /// + /// The values are not transformed in any way and thus are considered stable + /// (if the ProtoBuf definition does not change) and safe for programmatic use. + pub fn as_str_name(&self) -> &'static str { + match self { + WalFlavor::Sqlite => "Sqlite", + WalFlavor::Libsql => "Libsql", + } + } + /// Creates an enum from field names used in the ProtoBuf definition. + pub fn from_str_name(value: &str) -> ::core::option::Option { + match value { + "Sqlite" => Some(Self::Sqlite), + "Libsql" => Some(Self::Libsql), + _ => None, + } + } + } } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] diff --git a/libsql-replication/src/injector/error.rs b/libsql-replication/src/injector/error.rs index b1cebfe28b..225960c4d1 100644 --- a/libsql-replication/src/injector/error.rs +++ b/libsql-replication/src/injector/error.rs @@ -1,4 +1,5 @@ pub type Result = std::result::Result; +pub type BoxError = Box; #[derive(Debug, thiserror::Error)] pub enum Error { @@ -6,6 +7,6 @@ pub enum Error { Io(#[from] std::io::Error), #[error("SQLite error: {0}")] Sqlite(#[from] rusqlite::Error), - #[error("A fatal error occured injecting frames")] - FatalInjectError, + #[error("A fatal error occured injecting frames: {0}")] + FatalInjectError(BoxError), } diff --git a/libsql-replication/src/injector/libsql_injector.rs b/libsql-replication/src/injector/libsql_injector.rs new file mode 100644 index 0000000000..946d35e547 --- /dev/null +++ b/libsql-replication/src/injector/libsql_injector.rs @@ -0,0 +1,44 @@ +use std::mem::size_of; + +use libsql_wal::io::StdIO; +use libsql_wal::replication::injector::Injector; +use libsql_wal::segment::Frame as WalFrame; +use zerocopy::{AsBytes, FromZeroes}; + +use crate::frame::{Frame, FrameNo}; + +use super::error::{Error, Result}; + +pub struct LibsqlInjector { + injector: Injector, +} + +impl super::Injector for LibsqlInjector { + async fn inject_frame(&mut self, frame: Frame) -> Result> { + // this is a bit annoying be we want to read the frame, and it has to be aligned, so we + // must copy it... + // FIXME: optimize this. + let mut wal_frame = WalFrame::new_box_zeroed(); + if frame.bytes().len() != size_of::() { + todo!("invalid frame"); + } + wal_frame.as_bytes_mut().copy_from_slice(&frame.bytes()[..]); + Ok(self + .injector + .insert_frame(wal_frame) + .await + .map_err(|e| Error::FatalInjectError(e.into()))?) + } + + async fn rollback(&mut self) { + self.injector.rollback(); + } + + async fn flush(&mut self) -> Result> { + self.injector + .flush(None) + .await + .map_err(|e| Error::FatalInjectError(e.into()))?; + Ok(None) + } +} diff --git a/libsql-replication/src/injector/mod.rs b/libsql-replication/src/injector/mod.rs index 1d69ae0aab..20a81cfa01 100644 --- a/libsql-replication/src/injector/mod.rs +++ b/libsql-replication/src/injector/mod.rs @@ -1,6 +1,7 @@ use std::future::Future; pub use sqlite_injector::SqliteInjector; +pub use libsql_injector::LibsqlInjector; use crate::frame::{Frame, FrameNo}; @@ -9,6 +10,7 @@ pub use error::Error; mod error; mod sqlite_injector; +mod libsql_injector; pub trait Injector { /// Inject a singular frame. diff --git a/libsql-replication/src/injector/sqlite_injector/mod.rs b/libsql-replication/src/injector/sqlite_injector/mod.rs index dea78ce4b5..545fbe810d 100644 --- a/libsql-replication/src/injector/sqlite_injector/mod.rs +++ b/libsql-replication/src/injector/sqlite_injector/mod.rs @@ -192,8 +192,8 @@ impl SqliteInjectorInner { match stmt.execute(()).and_then(|_| connection.cache_flush()) { Ok(_) => panic!("replication hook was not called"), Err(e) => { - if let Some(e) = e.sqlite_error() { - if e.extended_code == LIBSQL_INJECT_OK { + if let Some(err) = e.sqlite_error() { + if err.extended_code == LIBSQL_INJECT_OK { // refresh schema connection.pragma_update(None, "writable_schema", "reset")?; let mut rollback = connection.prepare_cached("ROLLBACK")?; @@ -203,16 +203,16 @@ impl SqliteInjectorInner { let commit_frame_no = self.biggest_uncommitted_seen; self.biggest_uncommitted_seen = 0; return Ok(Some(commit_frame_no)); - } else if e.extended_code == LIBSQL_INJECT_OK_TXN { + } else if err.extended_code == LIBSQL_INJECT_OK_TXN { self.is_txn = true; assert!(self.buffer.lock().is_empty()); return Ok(None); - } else if e.extended_code == LIBSQL_INJECT_FATAL { - return Err(Error::FatalInjectError); + } else if err.extended_code == LIBSQL_INJECT_FATAL { + return Err(Error::FatalInjectError(e.into())); } } - Err(Error::FatalInjectError) + Err(Error::FatalInjectError(e.into())) } } } diff --git a/libsql-replication/src/rpc.rs b/libsql-replication/src/rpc.rs index ebc92cf10c..a9b172db20 100644 --- a/libsql-replication/src/rpc.rs +++ b/libsql-replication/src/rpc.rs @@ -25,6 +25,8 @@ pub mod replication { #![allow(clippy::all)] use uuid::Uuid; + + use self::hello_request::WalFlavor; include!("generated/wal_log.rs"); pub const NO_HELLO_ERROR_MSG: &str = "NO_HELLO"; @@ -46,9 +48,10 @@ pub mod replication { } impl HelloRequest { - pub fn new() -> Self { + pub fn new(wal_flavor: WalFlavor) -> Self { Self { handshake_version: Some(1), + wal_flavor: Some(wal_flavor.into()) } } } diff --git a/libsql-server/Cargo.toml b/libsql-server/Cargo.toml index 6763c02dfb..934a400786 100644 --- a/libsql-server/Cargo.toml +++ b/libsql-server/Cargo.toml @@ -83,7 +83,7 @@ url = { version = "2.3", features = ["serde"] } uuid = { version = "1.3", features = ["v4", "serde", "v7"] } aes = { version = "0.8.3", optional = true } cbc = { version = "0.1.2", optional = true } -zerocopy = { version = "0.7.28", features = ["derive", "alloc"] } +zerocopy = { workspace = true } hashbrown = { version = "0.14.3", features = ["serde"] } hdrhistogram = "7.5.4" crossbeam = "0.8.4" diff --git a/libsql-server/src/replication/replicator_client.rs b/libsql-server/src/replication/replicator_client.rs index 4d12ff7f83..d68c259dc9 100644 --- a/libsql-server/src/replication/replicator_client.rs +++ b/libsql-server/src/replication/replicator_client.rs @@ -7,6 +7,7 @@ use futures::TryStreamExt; use libsql_replication::frame::Frame; use libsql_replication::meta::WalIndexMeta; use libsql_replication::replicator::{map_frame_err, Error, ReplicatorClient}; +use libsql_replication::rpc::replication::hello_request::WalFlavor; use libsql_replication::rpc::replication::replication_log_client::ReplicationLogClient; use libsql_replication::rpc::replication::{ verify_session_token, HelloRequest, LogOffset, NAMESPACE_METADATA_KEY, SESSION_TOKEN_KEY, @@ -35,6 +36,7 @@ pub struct Client { // the primary current replication index, as reported by the last handshake pub primary_replication_index: Option, store: NamespaceStore, + wal_flavor: WalFlavor, } impl Client { @@ -44,6 +46,7 @@ impl Client { path: &Path, meta_store_handle: MetaStoreHandle, store: NamespaceStore, + wal_flavor: WalFlavor, ) -> crate::Result { let (current_frame_no_notifier, _) = watch::channel(None); let meta = WalIndexMeta::open(path).await?; @@ -57,6 +60,7 @@ impl Client { meta_store_handle, primary_replication_index: None, store, + wal_flavor, }) } @@ -96,7 +100,7 @@ impl ReplicatorClient for Client { #[tracing::instrument(skip(self))] async fn handshake(&mut self) -> Result<(), Error> { tracing::debug!("Attempting to perform handshake with primary."); - let req = self.make_request(HelloRequest::new()); + let req = self.make_request(HelloRequest::new(self.wal_flavor)); let resp = self.client.hello(req).await?; let hello = resp.into_inner(); verify_session_token(&hello.session_token).map_err(Error::Client)?; diff --git a/libsql-server/src/rpc/replication_log.rs b/libsql-server/src/rpc/replication_log.rs index c0b216739e..628cb4a01d 100644 --- a/libsql-server/src/rpc/replication_log.rs +++ b/libsql-server/src/rpc/replication_log.rs @@ -7,6 +7,7 @@ use bytes::Bytes; use chrono::{DateTime, Utc}; use futures::stream::BoxStream; use futures_core::Future; +use libsql_replication::rpc::replication::hello_request::WalFlavor; pub use libsql_replication::rpc::replication as rpc; use libsql_replication::rpc::replication::replication_log_server::ReplicationLog; use libsql_replication::rpc::replication::{ @@ -355,6 +356,10 @@ impl ReplicationLog for ReplicationLogService { } } + if let WalFlavor::Libsql = req.get_ref().wal_flavor() { + return Err(Status::invalid_argument("libsql wal not supported")) + } + let (logger, config, version, _, _) = self.logger_from_namespace(namespace, &req, false).await?; diff --git a/libsql-wal/Cargo.toml b/libsql-wal/Cargo.toml index 9624596c28..f24f2e4c59 100644 --- a/libsql-wal/Cargo.toml +++ b/libsql-wal/Cargo.toml @@ -9,6 +9,7 @@ publish = false [dependencies] arc-swap = "1.7.1" async-stream = "0.3.5" +async-lock = "3.4.0" bitflags = "2.5.0" bytes = "1.6.0" chrono = "0.4.38" @@ -29,7 +30,7 @@ tokio-stream = "0.1.15" tracing = "0.1.40" uuid = { version = "1.8.0", features = ["v4"] } walkdir = "2.5.0" -zerocopy = { version = "0.7.32", features = ["derive", "alloc"] } +zerocopy = { workspace = true } aws-config = { version = "1", optional = true, features = ["behavior-version-latest"] } aws-sdk-s3 = { version = "1", optional = true } diff --git a/libsql-wal/src/replication/injector.rs b/libsql-wal/src/replication/injector.rs index 66710bbb22..c3642e196e 100644 --- a/libsql-wal/src/replication/injector.rs +++ b/libsql-wal/src/replication/injector.rs @@ -6,23 +6,23 @@ use crate::error::Result; use crate::io::Io; use crate::segment::Frame; use crate::shared_wal::SharedWal; -use crate::transaction::TxGuard; +use crate::transaction::TxGuardOwned; /// The injector takes frames and injects them in the wal. -pub struct Injector<'a, IO: Io> { +pub struct Injector { // The wal to which we are injecting wal: Arc>, buffer: Vec>, /// capacity of the frame buffer capacity: usize, - tx: TxGuard<'a, IO::File>, + tx: TxGuardOwned, max_tx_frame_no: u64, } -impl<'a, IO: Io> Injector<'a, IO> { +impl Injector { pub fn new( wal: Arc>, - tx: TxGuard<'a, IO::File>, + tx: TxGuardOwned, buffer_capacity: usize, ) -> Result { Ok(Self { @@ -34,7 +34,7 @@ impl<'a, IO: Io> Injector<'a, IO> { }) } - pub async fn insert_frame(&mut self, frame: Box) -> Result<()> { + pub async fn insert_frame(&mut self, frame: Box) -> Result> { let size_after = frame.size_after(); self.max_tx_frame_no = self.max_tx_frame_no.max(frame.header().frame_no()); self.buffer.push(frame); @@ -43,10 +43,10 @@ impl<'a, IO: Io> Injector<'a, IO> { self.flush(size_after).await?; } - Ok(()) + Ok(size_after.map(|_| self.max_tx_frame_no)) } - async fn flush(&mut self, size_after: Option) -> Result<()> { + pub async fn flush(&mut self, size_after: Option) -> Result<()> { let buffer = std::mem::take(&mut self.buffer); let current = self.wal.current.load(); let commit_data = size_after.map(|size| (size, self.max_tx_frame_no)); @@ -60,6 +60,11 @@ impl<'a, IO: Io> Injector<'a, IO> { Ok(()) } + + pub fn rollback(&mut self) { + self.buffer.clear(); + self.tx.reset(0); + } } #[cfg(test)] @@ -89,7 +94,7 @@ mod test { let mut tx = crate::transaction::Transaction::Read(replica_shared.begin_read(42)); replica_shared.upgrade(&mut tx).unwrap(); - let guard = tx.as_write_mut().unwrap().lock(); + let guard = tx.into_write().unwrap_or_else(|_| panic!()).into_lock_owned(); let mut injector = Injector::new(replica_shared.clone(), guard, 10).unwrap(); primary_conn.execute("create table test (x)", ()).unwrap(); diff --git a/libsql-wal/src/segment/current.rs b/libsql-wal/src/segment/current.rs index d8d720a145..bda6d5742a 100644 --- a/libsql-wal/src/segment/current.rs +++ b/libsql-wal/src/segment/current.rs @@ -22,7 +22,7 @@ use crate::io::file::FileExt; use crate::io::Inspect; use crate::segment::{checked_frame_offset, SegmentFlags}; use crate::segment::{frame_offset, page_offset, sealed::SealedSegment}; -use crate::transaction::{Transaction, TxGuard}; +use crate::transaction::{Transaction, TxGuard, TxGuardOwned}; use crate::{LIBSQL_MAGIC, LIBSQL_PAGE_SIZE, LIBSQL_WAL_VERSION}; use super::list::SegmentList; @@ -125,7 +125,7 @@ impl CurrentSegment { frames: Vec>, // (size_after, last_frame_no) commit_data: Option<(u32, u64)>, - tx: &mut TxGuard<'_, F>, + tx: &mut TxGuardOwned, ) -> Result>> where F: FileExt, diff --git a/libsql-wal/src/shared_wal.rs b/libsql-wal/src/shared_wal.rs index 09a2747c5a..461ad13e03 100644 --- a/libsql-wal/src/shared_wal.rs +++ b/libsql-wal/src/shared_wal.rs @@ -20,7 +20,7 @@ use libsql_sys::name::NamespaceName; #[derive(Default)] pub struct WalLock { - pub(crate) tx_id: Arc>>, + pub(crate) tx_id: Arc>>, /// When a writer is popped from the write queue, its write transaction may not be reading from the most recent /// snapshot. In this case, we return `SQLITE_BUSY_SNAPHSOT` to the caller. If no reads were performed /// with that transaction before upgrading, then the caller will call us back immediately after re-acquiring @@ -108,7 +108,7 @@ impl SharedWal { Some(id) if id == read_tx.conn_id => { tracing::trace!("taking reserved slot"); reserved.take(); - let lock = self.wal_lock.tx_id.lock(); + let lock = self.wal_lock.tx_id.lock_blocking(); let write_tx = self.acquire_write(read_tx, lock, reserved)?; *tx = Transaction::Write(write_tx); return Ok(()); @@ -117,7 +117,7 @@ impl SharedWal { } } - let lock = self.wal_lock.tx_id.lock(); + let lock = self.wal_lock.tx_id.lock_blocking(); match *lock { None if self.wal_lock.waiters.is_empty() => { let write_tx = @@ -144,7 +144,7 @@ impl SharedWal { fn acquire_write( &self, read_tx: &ReadTransaction, - mut tx_id_lock: MutexGuard>, + mut tx_id_lock: async_lock::MutexGuard>, mut reserved: MutexGuard>, ) -> Result> { // we read two fields in the header. There is no risk that a transaction commit in diff --git a/libsql-wal/src/transaction.rs b/libsql-wal/src/transaction.rs index 723cffeae1..f2cdd5be70 100644 --- a/libsql-wal/src/transaction.rs +++ b/libsql-wal/src/transaction.rs @@ -4,7 +4,6 @@ use std::sync::Arc; use std::time::Instant; use libsql_sys::name::NamespaceName; -use parking_lot::{ArcMutexGuard, RawMutex}; use tokio::sync::mpsc; use crate::checkpointer::CheckpointMessage; @@ -31,6 +30,14 @@ impl Transaction { } } + pub fn into_write(self) -> Result, Self> { + if let Self::Write(v) = self { + Ok(v) + } else { + Err(self) + } + } + pub fn max_frame_no(&self) -> u64 { match self { Transaction::Write(w) => w.next_frame_no - 1, @@ -147,8 +154,27 @@ pub struct WriteTransaction { pub recompute_checksum: Option, } +pub struct TxGuardOwned { + _lock: async_lock::MutexGuardArc>, + inner: WriteTransaction, +} + +impl Deref for TxGuardOwned { + type Target = WriteTransaction; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl DerefMut for TxGuardOwned { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + pub struct TxGuard<'a, F> { - _lock: ArcMutexGuard>, + _lock: async_lock::MutexGuardArc>, inner: &'a mut WriteTransaction, } @@ -189,7 +215,7 @@ impl WriteTransaction { todo!("txn has already been commited"); } - let g = self.wal_lock.tx_id.lock_arc(); + let g = self.wal_lock.tx_id.lock_arc_blocking(); match *g { // we still hold the lock, we can proceed Some(id) if self.id == id => TxGuard { @@ -202,6 +228,25 @@ impl WriteTransaction { } } + pub fn into_lock_owned(self) -> TxGuardOwned { + if self.is_commited { + tracing::error!("transaction already commited"); + todo!("txn has already been commited"); + } + + let g = self.wal_lock.tx_id.lock_arc_blocking(); + match *g { + // we still hold the lock, we can proceed + Some(id) if self.id == id => TxGuardOwned { + _lock: g, + inner: self, + }, + // Somebody took the lock from us + Some(_) => todo!("lock stolen"), + None => todo!("not a transaction"), + } + } + pub fn reset(&mut self, savepoint_id: usize) { if savepoint_id >= self.savepoints.len() { unreachable!("savepoint doesn't exist"); @@ -231,7 +276,7 @@ impl WriteTransaction { let Self { wal_lock, read_tx, .. } = self; - let mut lock = wal_lock.tx_id.lock(); + let mut lock = wal_lock.tx_id.lock_blocking(); match *lock { Some(lock_id) if lock_id == read_tx.id => { lock.take(); diff --git a/libsql/src/replication/remote_client.rs b/libsql/src/replication/remote_client.rs index d0052f50d9..79cffb1c38 100644 --- a/libsql/src/replication/remote_client.rs +++ b/libsql/src/replication/remote_client.rs @@ -8,6 +8,7 @@ use futures::StreamExt as _; use libsql_replication::frame::{Frame, FrameHeader, FrameNo}; use libsql_replication::meta::WalIndexMeta; use libsql_replication::replicator::{map_frame_err, Error, ReplicatorClient}; +use libsql_replication::rpc::replication::hello_request::WalFlavor; use libsql_replication::rpc::replication::{ verify_session_token, Frames, HelloRequest, HelloResponse, LogOffset, SESSION_TOKEN_KEY, }; @@ -116,7 +117,7 @@ impl RemoteClient { self.dirty = false; } let prefetch = self.session_token.is_some(); - let hello_req = self.make_request(HelloRequest::new()); + let hello_req = self.make_request(HelloRequest::new(WalFlavor::Sqlite)); let log_offset_req = self.make_request(LogOffset { next_offset: self.next_offset(), }); From 566664e9a0acc7bbd9544c49f5f573eaec6a0a7e Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 8 Aug 2024 15:14:01 +0200 Subject: [PATCH 42/50] fmt --- bottomless/src/replicator.rs | 3 +- libsql-replication/src/injector/error.rs | 2 +- libsql-replication/src/injector/mod.rs | 6 +-- .../src/injector/sqlite_injector/mod.rs | 38 +++++++++---------- libsql-replication/src/replicator.rs | 3 +- libsql-replication/src/rpc.rs | 2 +- libsql-server/src/rpc/replication_log.rs | 4 +- libsql-wal/src/replication/injector.rs | 5 ++- 8 files changed, 33 insertions(+), 30 deletions(-) diff --git a/bottomless/src/replicator.rs b/bottomless/src/replicator.rs index 26e190df66..4e92824778 100644 --- a/bottomless/src/replicator.rs +++ b/bottomless/src/replicator.rs @@ -1455,7 +1455,8 @@ impl Replicator { 4096, libsql_sys::connection::NO_AUTOCHECKPOINT, encryption_config, - ).await?; + ) + .await?; let prefix = format!("{}-{}/", self.db_name, generation); let mut page_buf = { let mut v = Vec::with_capacity(page_size); diff --git a/libsql-replication/src/injector/error.rs b/libsql-replication/src/injector/error.rs index 225960c4d1..ac8f1be711 100644 --- a/libsql-replication/src/injector/error.rs +++ b/libsql-replication/src/injector/error.rs @@ -1,4 +1,4 @@ -pub type Result = std::result::Result; +pub type Result = std::result::Result; pub type BoxError = Box; #[derive(Debug, thiserror::Error)] diff --git a/libsql-replication/src/injector/mod.rs b/libsql-replication/src/injector/mod.rs index 20a81cfa01..39df68c777 100644 --- a/libsql-replication/src/injector/mod.rs +++ b/libsql-replication/src/injector/mod.rs @@ -1,16 +1,16 @@ use std::future::Future; -pub use sqlite_injector::SqliteInjector; pub use libsql_injector::LibsqlInjector; +pub use sqlite_injector::SqliteInjector; use crate::frame::{Frame, FrameNo}; -use error::Result; pub use error::Error; +use error::Result; mod error; -mod sqlite_injector; mod libsql_injector; +mod sqlite_injector; pub trait Injector { /// Inject a singular frame. diff --git a/libsql-replication/src/injector/sqlite_injector/mod.rs b/libsql-replication/src/injector/sqlite_injector/mod.rs index 545fbe810d..2f4193e469 100644 --- a/libsql-replication/src/injector/sqlite_injector/mod.rs +++ b/libsql-replication/src/injector/sqlite_injector/mod.rs @@ -25,28 +25,23 @@ pub struct SqliteInjector { } impl Injector for SqliteInjector { - async fn inject_frame( - &mut self, - frame: Frame, - ) -> Result> { + async fn inject_frame(&mut self, frame: Frame) -> Result> { let inner = self.inner.clone(); - spawn_blocking(move || { - inner.lock().inject_frame(frame) - }).await.unwrap() + spawn_blocking(move || inner.lock().inject_frame(frame)) + .await + .unwrap() } async fn rollback(&mut self) { let inner = self.inner.clone(); - spawn_blocking(move || { - inner.lock().rollback() - }).await.unwrap(); + spawn_blocking(move || inner.lock().rollback()) + .await + .unwrap(); } async fn flush(&mut self) -> Result> { let inner = self.inner.clone(); - spawn_blocking(move || { - inner.lock().flush() - }).await.unwrap() + spawn_blocking(move || inner.lock().flush()).await.unwrap() } } @@ -56,13 +51,15 @@ impl SqliteInjector { capacity: usize, auto_checkpoint: u32, encryption_config: Option, - ) ->super::Result { + ) -> super::Result { let inner = spawn_blocking(move || { SqliteInjectorInner::new(path, capacity, auto_checkpoint, encryption_config) - }).await.unwrap()?; + }) + .await + .unwrap()?; Ok(Self { - inner: Arc::new(Mutex::new(inner)) + inner: Arc::new(Mutex::new(inner)), }) } } @@ -278,7 +275,8 @@ mod test { fn test_simple_inject_frames() { let temp = tempfile::tempdir().unwrap(); - let mut injector = SqliteInjectorInner::new(temp.path().join("data"), 10, 10000, None).unwrap(); + let mut injector = + SqliteInjectorInner::new(temp.path().join("data"), 10, 10000, None).unwrap(); let log = wal_log(); for frame in log { injector.inject_frame(frame).unwrap(); @@ -298,7 +296,8 @@ mod test { let temp = tempfile::tempdir().unwrap(); // inject one frame at a time - let mut injector = SqliteInjectorInner::new(temp.path().join("data"), 1, 10000, None).unwrap(); + let mut injector = + SqliteInjectorInner::new(temp.path().join("data"), 1, 10000, None).unwrap(); let log = wal_log(); for frame in log { injector.inject_frame(frame).unwrap(); @@ -318,7 +317,8 @@ mod test { let temp = tempfile::tempdir().unwrap(); // inject one frame at a time - let mut injector = SqliteInjectorInner::new(temp.path().join("data"), 10, 1000, None).unwrap(); + let mut injector = + SqliteInjectorInner::new(temp.path().join("data"), 10, 1000, None).unwrap(); let mut frames = wal_log(); assert!(injector diff --git a/libsql-replication/src/replicator.rs b/libsql-replication/src/replicator.rs index 31c766faad..ee75822676 100644 --- a/libsql-replication/src/replicator.rs +++ b/libsql-replication/src/replicator.rs @@ -168,7 +168,7 @@ where auto_checkpoint, encryption_config, ) - .await?; + .await?; Ok(Self::new(client, injector)) } @@ -179,7 +179,6 @@ where C: ReplicatorClient, I: Injector, { - pub fn new(client: C, injector: I) -> Self { Self { client, diff --git a/libsql-replication/src/rpc.rs b/libsql-replication/src/rpc.rs index a9b172db20..8e1165af65 100644 --- a/libsql-replication/src/rpc.rs +++ b/libsql-replication/src/rpc.rs @@ -51,7 +51,7 @@ pub mod replication { pub fn new(wal_flavor: WalFlavor) -> Self { Self { handshake_version: Some(1), - wal_flavor: Some(wal_flavor.into()) + wal_flavor: Some(wal_flavor.into()), } } } diff --git a/libsql-server/src/rpc/replication_log.rs b/libsql-server/src/rpc/replication_log.rs index 628cb4a01d..bf1840a5c6 100644 --- a/libsql-server/src/rpc/replication_log.rs +++ b/libsql-server/src/rpc/replication_log.rs @@ -7,8 +7,8 @@ use bytes::Bytes; use chrono::{DateTime, Utc}; use futures::stream::BoxStream; use futures_core::Future; -use libsql_replication::rpc::replication::hello_request::WalFlavor; pub use libsql_replication::rpc::replication as rpc; +use libsql_replication::rpc::replication::hello_request::WalFlavor; use libsql_replication::rpc::replication::replication_log_server::ReplicationLog; use libsql_replication::rpc::replication::{ Frame, Frames, HelloRequest, HelloResponse, LogOffset, NAMESPACE_DOESNT_EXIST, @@ -357,7 +357,7 @@ impl ReplicationLog for ReplicationLogService { } if let WalFlavor::Libsql = req.get_ref().wal_flavor() { - return Err(Status::invalid_argument("libsql wal not supported")) + return Err(Status::invalid_argument("libsql wal not supported")); } let (logger, config, version, _, _) = diff --git a/libsql-wal/src/replication/injector.rs b/libsql-wal/src/replication/injector.rs index c3642e196e..a922330102 100644 --- a/libsql-wal/src/replication/injector.rs +++ b/libsql-wal/src/replication/injector.rs @@ -94,7 +94,10 @@ mod test { let mut tx = crate::transaction::Transaction::Read(replica_shared.begin_read(42)); replica_shared.upgrade(&mut tx).unwrap(); - let guard = tx.into_write().unwrap_or_else(|_| panic!()).into_lock_owned(); + let guard = tx + .into_write() + .unwrap_or_else(|_| panic!()) + .into_lock_owned(); let mut injector = Injector::new(replica_shared.clone(), guard, 10).unwrap(); primary_conn.execute("create table test (x)", ()).unwrap(); From a2bdc805743f87f5c8cf2852c548578dbe496401 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 8 Aug 2024 16:07:42 +0200 Subject: [PATCH 43/50] pass RpcFrame to client methods necessary to pass different underlying frames --- bottomless/src/replicator.rs | 7 ++- .../src/injector/libsql_injector.rs | 9 ++-- libsql-replication/src/injector/mod.rs | 5 +- .../src/injector/sqlite_injector/mod.rs | 5 +- libsql-replication/src/replicator.rs | 49 +++++++++++++------ .../src/replication/replicator_client.rs | 15 +++--- libsql/src/replication/local_client.rs | 14 ++++-- libsql/src/replication/remote_client.rs | 17 ++++--- 8 files changed, 79 insertions(+), 42 deletions(-) diff --git a/bottomless/src/replicator.rs b/bottomless/src/replicator.rs index 4e92824778..cd37a70165 100644 --- a/bottomless/src/replicator.rs +++ b/bottomless/src/replicator.rs @@ -18,6 +18,7 @@ use aws_sdk_s3::{Client, Config}; use bytes::{Buf, Bytes}; use chrono::{DateTime, NaiveDateTime, TimeZone, Utc}; use libsql_replication::injector::Injector as _; +use libsql_replication::rpc::replication::Frame as RpcFrame; use libsql_sys::{Cipher, EncryptionConfig}; use std::ops::Deref; use std::path::{Path, PathBuf}; @@ -1554,7 +1555,11 @@ impl Replicator { }, page_buf.as_slice(), ); - injector.inject_frame(frame_to_inject).await?; + let frame = RpcFrame { + data: frame_to_inject.bytes(), + timestamp: None, + }; + injector.inject_frame(frame).await?; applied_wal_frame = true; } } diff --git a/libsql-replication/src/injector/libsql_injector.rs b/libsql-replication/src/injector/libsql_injector.rs index 946d35e547..f867a29245 100644 --- a/libsql-replication/src/injector/libsql_injector.rs +++ b/libsql-replication/src/injector/libsql_injector.rs @@ -5,7 +5,8 @@ use libsql_wal::replication::injector::Injector; use libsql_wal::segment::Frame as WalFrame; use zerocopy::{AsBytes, FromZeroes}; -use crate::frame::{Frame, FrameNo}; +use crate::frame::FrameNo; +use crate::rpc::replication::Frame as RpcFrame; use super::error::{Error, Result}; @@ -14,15 +15,15 @@ pub struct LibsqlInjector { } impl super::Injector for LibsqlInjector { - async fn inject_frame(&mut self, frame: Frame) -> Result> { + async fn inject_frame(&mut self, frame: RpcFrame) -> Result> { // this is a bit annoying be we want to read the frame, and it has to be aligned, so we // must copy it... // FIXME: optimize this. let mut wal_frame = WalFrame::new_box_zeroed(); - if frame.bytes().len() != size_of::() { + if frame.data.len() != size_of::() { todo!("invalid frame"); } - wal_frame.as_bytes_mut().copy_from_slice(&frame.bytes()[..]); + wal_frame.as_bytes_mut().copy_from_slice(&frame.data[..]); Ok(self .injector .insert_frame(wal_frame) diff --git a/libsql-replication/src/injector/mod.rs b/libsql-replication/src/injector/mod.rs index 39df68c777..3712458d2f 100644 --- a/libsql-replication/src/injector/mod.rs +++ b/libsql-replication/src/injector/mod.rs @@ -1,9 +1,10 @@ use std::future::Future; +use super::rpc::replication::Frame as RpcFrame; pub use libsql_injector::LibsqlInjector; pub use sqlite_injector::SqliteInjector; -use crate::frame::{Frame, FrameNo}; +use crate::frame::FrameNo; pub use error::Error; use error::Result; @@ -16,7 +17,7 @@ pub trait Injector { /// Inject a singular frame. fn inject_frame( &mut self, - frame: Frame, + frame: RpcFrame, ) -> impl Future>> + Send; /// Discard any uncommintted frames. diff --git a/libsql-replication/src/injector/sqlite_injector/mod.rs b/libsql-replication/src/injector/sqlite_injector/mod.rs index 2f4193e469..f6ce2aa89f 100644 --- a/libsql-replication/src/injector/sqlite_injector/mod.rs +++ b/libsql-replication/src/injector/sqlite_injector/mod.rs @@ -7,6 +7,7 @@ use rusqlite::OpenFlags; use tokio::task::spawn_blocking; use crate::frame::{Frame, FrameNo}; +use crate::rpc::replication::Frame as RpcFrame; use self::injector_wal::{ InjectorWal, InjectorWalManager, LIBSQL_INJECT_FATAL, LIBSQL_INJECT_OK, LIBSQL_INJECT_OK_TXN, @@ -25,8 +26,10 @@ pub struct SqliteInjector { } impl Injector for SqliteInjector { - async fn inject_frame(&mut self, frame: Frame) -> Result> { + async fn inject_frame(&mut self, frame: RpcFrame) -> Result> { let inner = self.inner.clone(); + let frame = + Frame::try_from(&frame.data[..]).map_err(|e| Error::FatalInjectError(e.into()))?; spawn_blocking(move || inner.lock().inject_frame(frame)) .await .unwrap() diff --git a/libsql-replication/src/replicator.rs b/libsql-replication/src/replicator.rs index ee75822676..38cdbf6e7c 100644 --- a/libsql-replication/src/replicator.rs +++ b/libsql-replication/src/replicator.rs @@ -63,7 +63,7 @@ impl From for Error { #[async_trait::async_trait] pub trait ReplicatorClient { - type FrameStream: Stream> + Unpin + Send; + type FrameStream: Stream> + Unpin + Send; /// Perform handshake with remote async fn handshake(&mut self) -> Result<(), Error>; @@ -318,7 +318,7 @@ where } } - async fn inject_frame(&mut self, frame: Frame) -> Result<(), Error> { + async fn inject_frame(&mut self, frame: RpcFrame) -> Result<(), Error> { self.frames_synced += 1; match self.injector.inject_frame(frame).await? { @@ -360,6 +360,7 @@ mod test { use async_stream::stream; use crate::frame::{FrameBorrowed, FrameMut}; + use crate::rpc::replication::Frame as RpcFrame; use super::*; @@ -370,7 +371,8 @@ mod test { #[async_trait::async_trait] impl ReplicatorClient for Client { - type FrameStream = Pin> + Send + 'static>>; + type FrameStream = + Pin> + Send + 'static>>; /// Perform handshake with remote async fn handshake(&mut self) -> Result<(), Error> { @@ -414,7 +416,8 @@ mod test { #[async_trait::async_trait] impl ReplicatorClient for Client { - type FrameStream = Pin> + Send + 'static>>; + type FrameStream = + Pin> + Send + 'static>>; /// Perform handshake with remote async fn handshake(&mut self) -> Result<(), Error> { @@ -456,7 +459,8 @@ mod test { #[async_trait::async_trait] impl ReplicatorClient for Client { - type FrameStream = Pin> + Send + 'static>>; + type FrameStream = + Pin> + Send + 'static>>; /// Perform handshake with remote async fn handshake(&mut self) -> Result<(), Error> { @@ -500,7 +504,8 @@ mod test { #[async_trait::async_trait] impl ReplicatorClient for Client { - type FrameStream = Pin> + Send + 'static>>; + type FrameStream = + Pin> + Send + 'static>>; /// Perform handshake with remote async fn handshake(&mut self) -> Result<(), Error> { @@ -544,7 +549,8 @@ mod test { #[async_trait::async_trait] impl ReplicatorClient for Client { - type FrameStream = Pin> + Send + 'static>>; + type FrameStream = + Pin> + Send + 'static>>; /// Perform handshake with remote async fn handshake(&mut self) -> Result<(), Error> { @@ -586,7 +592,8 @@ mod test { #[async_trait::async_trait] impl ReplicatorClient for Client { - type FrameStream = Pin> + Send + 'static>>; + type FrameStream = + Pin> + Send + 'static>>; /// Perform handshake with remote async fn handshake(&mut self) -> Result<(), Error> { @@ -627,7 +634,8 @@ mod test { #[async_trait::async_trait] impl ReplicatorClient for Client { - type FrameStream = Pin> + Send + 'static>>; + type FrameStream = + Pin> + Send + 'static>>; /// Perform handshake with remote async fn handshake(&mut self) -> Result<(), Error> { @@ -672,7 +680,8 @@ mod test { #[async_trait::async_trait] impl ReplicatorClient for Client { - type FrameStream = Pin> + Send + 'static>>; + type FrameStream = + Pin> + Send + 'static>>; /// Perform handshake with remote async fn handshake(&mut self) -> Result<(), Error> { @@ -740,7 +749,8 @@ mod test { #[async_trait::async_trait] impl ReplicatorClient for Client { - type FrameStream = Pin> + Send + 'static>>; + type FrameStream = + Pin> + Send + 'static>>; /// Perform handshake with remote async fn handshake(&mut self) -> Result<(), Error> { @@ -752,15 +762,26 @@ mod test { let frames = self .frames .iter() + .map(|f| RpcFrame { + data: f.bytes(), + timestamp: None, + }) .take(2) - .cloned() .map(Ok) .chain(Some(Err(Error::Client("some client error".into())))) .collect::>(); Ok(Box::pin(tokio_stream::iter(frames))) } else { - let stream = tokio_stream::iter(self.frames.clone().into_iter().map(Ok)); - Ok(Box::pin(stream)) + let iter = self + .frames + .iter() + .map(|f| RpcFrame { + data: f.bytes(), + timestamp: None, + }) + .map(Ok) + .collect::>(); + Ok(Box::pin(tokio_stream::iter(iter))) } } /// Return a snapshot for the current replication index. Called after next_frame has returned a diff --git a/libsql-server/src/replication/replicator_client.rs b/libsql-server/src/replication/replicator_client.rs index d68c259dc9..89e465053b 100644 --- a/libsql-server/src/replication/replicator_client.rs +++ b/libsql-server/src/replication/replicator_client.rs @@ -4,16 +4,17 @@ use std::pin::Pin; use bytes::Bytes; use chrono::{DateTime, Utc}; use futures::TryStreamExt; -use libsql_replication::frame::Frame; use libsql_replication::meta::WalIndexMeta; -use libsql_replication::replicator::{map_frame_err, Error, ReplicatorClient}; +use libsql_replication::replicator::{Error, ReplicatorClient}; use libsql_replication::rpc::replication::hello_request::WalFlavor; use libsql_replication::rpc::replication::replication_log_client::ReplicationLogClient; use libsql_replication::rpc::replication::{ - verify_session_token, HelloRequest, LogOffset, NAMESPACE_METADATA_KEY, SESSION_TOKEN_KEY, + verify_session_token, Frame as RpcFrame, HelloRequest, LogOffset, NAMESPACE_METADATA_KEY, + SESSION_TOKEN_KEY, }; use tokio::sync::watch; -use tokio_stream::{Stream, StreamExt}; +use tokio_stream::Stream; + use tonic::metadata::{AsciiMetadataValue, BinaryMetadataValue}; use tonic::transport::Channel; use tonic::{Code, Request, Status}; @@ -95,7 +96,7 @@ impl Client { #[async_trait::async_trait] impl ReplicatorClient for Client { - type FrameStream = Pin> + Send + 'static>>; + type FrameStream = Pin> + Send + 'static>>; #[tracing::instrument(skip(self))] async fn handshake(&mut self) -> Result<(), Error> { @@ -169,7 +170,7 @@ impl ReplicatorClient for Client { None => REPLICATION_LATENCY_CACHE_MISS.increment(1), } }) - .map(map_frame_err); + .map_err(Into::into); Ok(Box::pin(stream)) } @@ -181,7 +182,7 @@ impl ReplicatorClient for Client { let req = self.make_request(offset); match self.client.snapshot(req).await { Ok(resp) => { - let stream = resp.into_inner().map(map_frame_err); + let stream = resp.into_inner().map_err(Into::into); Ok(Box::pin(stream)) } Err(e) if e.code() == Code::Unavailable => Err(Error::SnapshotPending), diff --git a/libsql/src/replication/local_client.rs b/libsql/src/replication/local_client.rs index 2d7b940c92..d3c713f530 100644 --- a/libsql/src/replication/local_client.rs +++ b/libsql/src/replication/local_client.rs @@ -3,6 +3,7 @@ use std::pin::Pin; use futures::{StreamExt, TryStreamExt}; use libsql_replication::{ + rpc::replication::Frame as RpcFrame, frame::{Frame, FrameNo}, meta::WalIndexMeta, replicator::{Error, ReplicatorClient}, @@ -35,7 +36,7 @@ impl LocalClient { #[async_trait::async_trait] impl ReplicatorClient for LocalClient { - type FrameStream = Pin> + Send + 'static>>; + type FrameStream = Pin> + Send + 'static>>; /// Perform handshake with remote async fn handshake(&mut self) -> Result<(), Error> { @@ -46,7 +47,7 @@ impl ReplicatorClient for LocalClient { async fn next_frames(&mut self) -> Result { match self.frames.take() { Some(Frames::Vec(f)) => { - let iter = f.into_iter().map(Ok); + let iter = f.into_iter().map(|f| RpcFrame { data: f.bytes(), timestamp: None }).map(Ok); Ok(Box::pin(tokio_stream::iter(iter))) } Some(f @ Frames::Snapshot(_)) => { @@ -70,7 +71,8 @@ impl ReplicatorClient for LocalClient { if s.as_mut().peek().await.is_none() { next.header_mut().size_after = size_after.into(); } - yield Frame::from(next); + let frame = Frame::from(next); + yield RpcFrame { data: frame.bytes(), timestamp: None }; } }; @@ -95,8 +97,9 @@ impl ReplicatorClient for LocalClient { #[cfg(test)] mod test { - use libsql_replication::snapshot::SnapshotFile; + use libsql_replication::{frame::FrameHeader, snapshot::SnapshotFile}; use tempfile::tempdir; + use zerocopy::FromBytes; use super::*; @@ -111,7 +114,8 @@ mod test { let mut s = client.snapshot().await.unwrap(); assert!(matches!(s.next().await, Some(Ok(_)))); let last = s.next().await.unwrap().unwrap(); - assert_eq!(last.header().size_after.get(), 2); + let header: FrameHeader = FrameHeader::read_from_prefix(&last.data[..]).unwrap(); + assert_eq!(header.size_after.get(), 2); assert!(s.next().await.is_none()); } } diff --git a/libsql/src/replication/remote_client.rs b/libsql/src/replication/remote_client.rs index 79cffb1c38..26e537d18a 100644 --- a/libsql/src/replication/remote_client.rs +++ b/libsql/src/replication/remote_client.rs @@ -4,13 +4,13 @@ use std::pin::Pin; use std::time::{Duration, Instant}; use bytes::Bytes; -use futures::StreamExt as _; -use libsql_replication::frame::{Frame, FrameHeader, FrameNo}; +use futures::{StreamExt as _, TryStreamExt}; +use libsql_replication::frame::{FrameHeader, FrameNo}; use libsql_replication::meta::WalIndexMeta; -use libsql_replication::replicator::{map_frame_err, Error, ReplicatorClient}; +use libsql_replication::replicator::{Error, ReplicatorClient}; use libsql_replication::rpc::replication::hello_request::WalFlavor; use libsql_replication::rpc::replication::{ - verify_session_token, Frames, HelloRequest, HelloResponse, LogOffset, SESSION_TOKEN_KEY, + Frame as RpcFrame, verify_session_token, Frames, HelloRequest, HelloResponse, LogOffset, SESSION_TOKEN_KEY, }; use tokio_stream::Stream; use tonic::metadata::AsciiMetadataValue; @@ -161,7 +161,7 @@ impl RemoteClient { let frames_iter = frames .into_iter() - .map(|f| Frame::try_from(&*f.data).map_err(|e| Error::Client(e.into()))); + .map(Ok); let stream = tokio_stream::iter(frames_iter); @@ -197,7 +197,7 @@ impl RemoteClient { .snapshot(req) .await? .into_inner() - .map(map_frame_err) + .map_err(|e| e.into()) .peekable(); { @@ -205,7 +205,8 @@ impl RemoteClient { // the first frame is the one with the highest frame_no in the snapshot if let Some(Ok(f)) = frames.peek().await { - self.last_received = Some(f.header().frame_no.get()); + let header: FrameHeader = FrameHeader::read_from_prefix(&f.data[..]).unwrap(); + self.last_received = Some(header.frame_no.get()); } } @@ -240,7 +241,7 @@ fn maybe_log( #[async_trait::async_trait] impl ReplicatorClient for RemoteClient { - type FrameStream = Pin> + Send + 'static>>; + type FrameStream = Pin> + Send + 'static>>; /// Perform handshake with remote async fn handshake(&mut self) -> Result<(), Error> { From e97026c82d4bfc13f2298d98c705a19cea6159be Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 8 Aug 2024 16:33:10 +0200 Subject: [PATCH 44/50] feature gate libsql injector --- libsql-replication/Cargo.toml | 3 ++- libsql-replication/src/injector/mod.rs | 2 ++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/libsql-replication/Cargo.toml b/libsql-replication/Cargo.toml index 2a03d362bf..068e23a652 100644 --- a/libsql-replication/Cargo.toml +++ b/libsql-replication/Cargo.toml @@ -12,7 +12,7 @@ license = "MIT" tonic = { version = "0.11", features = ["tls"] } prost = "0.12" libsql-sys = { version = "0.7", path = "../libsql-sys", default-features = false, features = ["wal", "rusqlite", "api"] } -libsql-wal = { path = "../libsql-wal/" } +libsql-wal = { path = "../libsql-wal/", optional = true } rusqlite = { workspace = true } parking_lot = "0.12.1" bytes = { version = "1.5.0", features = ["serde"] } @@ -38,3 +38,4 @@ tonic-build = "0.11" [features] encryption = ["libsql-sys/encryption"] +libsql_wal = ["dep:libsql-wal"] diff --git a/libsql-replication/src/injector/mod.rs b/libsql-replication/src/injector/mod.rs index 3712458d2f..b139f07cc9 100644 --- a/libsql-replication/src/injector/mod.rs +++ b/libsql-replication/src/injector/mod.rs @@ -1,6 +1,7 @@ use std::future::Future; use super::rpc::replication::Frame as RpcFrame; +#[cfg(feature = "libsql_wal")] pub use libsql_injector::LibsqlInjector; pub use sqlite_injector::SqliteInjector; @@ -10,6 +11,7 @@ pub use error::Error; use error::Result; mod error; +#[cfg(feature = "libsql_wal")] mod libsql_injector; mod sqlite_injector; From d29ca7fabed4440e0eb89eab2b10765b85a38e84 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Sat, 10 Aug 2024 11:16:24 +0200 Subject: [PATCH 45/50] fix conflicts --- .../proto/replication_log.proto | 10 +++++----- libsql-replication/src/generated/wal_log.rs | 20 +++++++++---------- libsql-replication/src/rpc.rs | 4 +--- .../src/namespace/configurator/replica.rs | 4 +++- .../src/replication/replicator_client.rs | 6 ++++-- libsql-server/src/rpc/replication_log.rs | 18 +++++++++++------ libsql/src/replication/remote_client.rs | 6 ++++-- 7 files changed, 39 insertions(+), 29 deletions(-) diff --git a/libsql-replication/proto/replication_log.proto b/libsql-replication/proto/replication_log.proto index b3be419319..b358232705 100644 --- a/libsql-replication/proto/replication_log.proto +++ b/libsql-replication/proto/replication_log.proto @@ -5,18 +5,18 @@ import "metadata.proto"; message LogOffset { uint64 next_offset = 1; -} - -message HelloRequest { - optional uint64 handshake_version = 1; enum WalFlavor { Sqlite = 0; Libsql = 1; } - // the type of wal that the client is expecting + // the type of wal frames that the client is expecting optional WalFlavor wal_flavor = 2; } +message HelloRequest { + optional uint64 handshake_version = 1; +} + message HelloResponse { /// Uuid of the current generation string generation_id = 1; diff --git a/libsql-replication/src/generated/wal_log.rs b/libsql-replication/src/generated/wal_log.rs index 441881c4a7..a34d5e59dd 100644 --- a/libsql-replication/src/generated/wal_log.rs +++ b/libsql-replication/src/generated/wal_log.rs @@ -4,18 +4,12 @@ pub struct LogOffset { #[prost(uint64, tag = "1")] pub next_offset: u64, -} -#[allow(clippy::derive_partial_eq_without_eq)] -#[derive(Clone, PartialEq, ::prost::Message)] -pub struct HelloRequest { - #[prost(uint64, optional, tag = "1")] - pub handshake_version: ::core::option::Option, - /// the type of wal that the client is expecting - #[prost(enumeration = "hello_request::WalFlavor", optional, tag = "2")] + /// the type of wal frames that the client is expecting + #[prost(enumeration = "log_offset::WalFlavor", optional, tag = "2")] pub wal_flavor: ::core::option::Option, } -/// Nested message and enum types in `HelloRequest`. -pub mod hello_request { +/// Nested message and enum types in `LogOffset`. +pub mod log_offset { #[derive( Clone, Copy, @@ -55,6 +49,12 @@ pub mod hello_request { } #[allow(clippy::derive_partial_eq_without_eq)] #[derive(Clone, PartialEq, ::prost::Message)] +pub struct HelloRequest { + #[prost(uint64, optional, tag = "1")] + pub handshake_version: ::core::option::Option, +} +#[allow(clippy::derive_partial_eq_without_eq)] +#[derive(Clone, PartialEq, ::prost::Message)] pub struct HelloResponse { /// / Uuid of the current generation #[prost(string, tag = "1")] diff --git a/libsql-replication/src/rpc.rs b/libsql-replication/src/rpc.rs index 8e1165af65..a538bc4c28 100644 --- a/libsql-replication/src/rpc.rs +++ b/libsql-replication/src/rpc.rs @@ -26,7 +26,6 @@ pub mod replication { use uuid::Uuid; - use self::hello_request::WalFlavor; include!("generated/wal_log.rs"); pub const NO_HELLO_ERROR_MSG: &str = "NO_HELLO"; @@ -48,10 +47,9 @@ pub mod replication { } impl HelloRequest { - pub fn new(wal_flavor: WalFlavor) -> Self { + pub fn new() -> Self { Self { handshake_version: Some(1), - wal_flavor: Some(wal_flavor.into()), } } } diff --git a/libsql-server/src/namespace/configurator/replica.rs b/libsql-server/src/namespace/configurator/replica.rs index 84ebadb897..7832d30ef8 100644 --- a/libsql-server/src/namespace/configurator/replica.rs +++ b/libsql-server/src/namespace/configurator/replica.rs @@ -3,6 +3,7 @@ use std::sync::Arc; use futures::Future; use hyper::Uri; +use libsql_replication::rpc::replication::log_offset::WalFlavor; use libsql_replication::rpc::replication::replication_log_client::ReplicationLogClient; use tokio::task::JoinSet; use tonic::transport::Channel; @@ -68,10 +69,11 @@ impl ConfigureNamespace for ReplicaConfigurator { &db_path, meta_store_handle.clone(), store.clone(), + WalFlavor::Sqlite, ) .await?; let applied_frame_no_receiver = client.current_frame_no_notifier.subscribe(); - let mut replicator = libsql_replication::replicator::Replicator::new( + let mut replicator = libsql_replication::replicator::Replicator::new_sqlite( client, db_path.join("data"), DEFAULT_AUTO_CHECKPOINT, diff --git a/libsql-server/src/replication/replicator_client.rs b/libsql-server/src/replication/replicator_client.rs index 89e465053b..753baac996 100644 --- a/libsql-server/src/replication/replicator_client.rs +++ b/libsql-server/src/replication/replicator_client.rs @@ -6,7 +6,7 @@ use chrono::{DateTime, Utc}; use futures::TryStreamExt; use libsql_replication::meta::WalIndexMeta; use libsql_replication::replicator::{Error, ReplicatorClient}; -use libsql_replication::rpc::replication::hello_request::WalFlavor; +use libsql_replication::rpc::replication::log_offset::WalFlavor; use libsql_replication::rpc::replication::replication_log_client::ReplicationLogClient; use libsql_replication::rpc::replication::{ verify_session_token, Frame as RpcFrame, HelloRequest, LogOffset, NAMESPACE_METADATA_KEY, @@ -101,7 +101,7 @@ impl ReplicatorClient for Client { #[tracing::instrument(skip(self))] async fn handshake(&mut self) -> Result<(), Error> { tracing::debug!("Attempting to perform handshake with primary."); - let req = self.make_request(HelloRequest::new(self.wal_flavor)); + let req = self.make_request(HelloRequest::new()); let resp = self.client.hello(req).await?; let hello = resp.into_inner(); verify_session_token(&hello.session_token).map_err(Error::Client)?; @@ -143,6 +143,7 @@ impl ReplicatorClient for Client { async fn next_frames(&mut self) -> Result { let offset = LogOffset { next_offset: self.next_frame_no(), + wal_flavor: Some(self.wal_flavor.into()), }; let req = self.make_request(offset); let stream = self @@ -178,6 +179,7 @@ impl ReplicatorClient for Client { async fn snapshot(&mut self) -> Result { let offset = LogOffset { next_offset: self.next_frame_no(), + wal_flavor: Some(self.wal_flavor.into()), }; let req = self.make_request(offset); match self.client.snapshot(req).await { diff --git a/libsql-server/src/rpc/replication_log.rs b/libsql-server/src/rpc/replication_log.rs index bf1840a5c6..1ef306daf1 100644 --- a/libsql-server/src/rpc/replication_log.rs +++ b/libsql-server/src/rpc/replication_log.rs @@ -8,7 +8,7 @@ use chrono::{DateTime, Utc}; use futures::stream::BoxStream; use futures_core::Future; pub use libsql_replication::rpc::replication as rpc; -use libsql_replication::rpc::replication::hello_request::WalFlavor; +use libsql_replication::rpc::replication::log_offset::WalFlavor; use libsql_replication::rpc::replication::replication_log_server::ReplicationLog; use libsql_replication::rpc::replication::{ Frame, Frames, HelloRequest, HelloResponse, LogOffset, NAMESPACE_DOESNT_EXIST, @@ -260,6 +260,9 @@ impl ReplicationLog for ReplicationLogService { &self, req: tonic::Request, ) -> Result, Status> { + if let WalFlavor::Libsql = req.get_ref().wal_flavor() { + return Err(Status::invalid_argument("libsql wal not supported")); + } let namespace = super::extract_namespace(self.disable_namespaces, &req)?; self.authenticate(&req, namespace.clone()).await?; @@ -305,6 +308,9 @@ impl ReplicationLog for ReplicationLogService { &self, req: tonic::Request, ) -> Result, Status> { + if let WalFlavor::Libsql = req.get_ref().wal_flavor() { + return Err(Status::invalid_argument("libsql wal not supported")); + } let namespace = super::extract_namespace(self.disable_namespaces, &req)?; self.authenticate(&req, namespace.clone()).await?; @@ -355,11 +361,6 @@ impl ReplicationLog for ReplicationLogService { guard.insert((replica_addr, namespace.clone())); } } - - if let WalFlavor::Libsql = req.get_ref().wal_flavor() { - return Err(Status::invalid_argument("libsql wal not supported")); - } - let (logger, config, version, _, _) = self.logger_from_namespace(namespace, &req, false).await?; @@ -381,7 +382,12 @@ impl ReplicationLog for ReplicationLogService { &self, req: tonic::Request, ) -> Result, Status> { + if let WalFlavor::Libsql = req.get_ref().wal_flavor() { + return Err(Status::invalid_argument("libsql wal not supported")); + } + let namespace = super::extract_namespace(self.disable_namespaces, &req)?; + self.authenticate(&req, namespace.clone()).await?; let (logger, _, _, stats, _) = self.logger_from_namespace(namespace, &req, true).await?; diff --git a/libsql/src/replication/remote_client.rs b/libsql/src/replication/remote_client.rs index 26e537d18a..864392ddb5 100644 --- a/libsql/src/replication/remote_client.rs +++ b/libsql/src/replication/remote_client.rs @@ -8,7 +8,6 @@ use futures::{StreamExt as _, TryStreamExt}; use libsql_replication::frame::{FrameHeader, FrameNo}; use libsql_replication::meta::WalIndexMeta; use libsql_replication::replicator::{Error, ReplicatorClient}; -use libsql_replication::rpc::replication::hello_request::WalFlavor; use libsql_replication::rpc::replication::{ Frame as RpcFrame, verify_session_token, Frames, HelloRequest, HelloResponse, LogOffset, SESSION_TOKEN_KEY, }; @@ -117,9 +116,10 @@ impl RemoteClient { self.dirty = false; } let prefetch = self.session_token.is_some(); - let hello_req = self.make_request(HelloRequest::new(WalFlavor::Sqlite)); + let hello_req = self.make_request(HelloRequest::new()); let log_offset_req = self.make_request(LogOffset { next_offset: self.next_offset(), + wal_flavor: None, }); let mut client_clone = self.remote.clone(); let hello_fut = time(async { @@ -179,6 +179,7 @@ impl RemoteClient { None => { let req = self.make_request(LogOffset { next_offset: self.next_offset(), + wal_flavor: None, }); time(self.remote.replication.batch_log_entries(req)).await } @@ -190,6 +191,7 @@ impl RemoteClient { async fn do_snapshot(&mut self) -> Result<::FrameStream, Error> { let req = self.make_request(LogOffset { next_offset: self.next_offset(), + wal_flavor: None, }); let mut frames = self .remote From 9143039ab595be41d0400b808c7871b9ef1de6b4 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Fri, 9 Aug 2024 16:32:41 +0200 Subject: [PATCH 46/50] libsql rpc replicator --- Cargo.lock | 651 ++------- .../src/injector/libsql_injector.rs | 7 + libsql-replication/src/rpc.rs | 44 + libsql-server/Cargo.toml | 7 +- .../src/connection/connection_core.rs | 809 +++++++++++ .../src/connection/connection_manager.rs | 15 +- libsql-server/src/connection/legacy.rs | 456 +++++++ libsql-server/src/connection/libsql.rs | 1206 +---------------- libsql-server/src/connection/mod.rs | 14 +- libsql-server/src/connection/program.rs | 13 +- libsql-server/src/connection/write_proxy.rs | 74 +- libsql-server/src/database/libsql_primary.rs | 23 + libsql-server/src/database/libsql_replica.rs | 31 + libsql-server/src/database/mod.rs | 122 +- libsql-server/src/database/primary.rs | 6 +- libsql-server/src/database/replica.rs | 8 +- libsql-server/src/database/schema.rs | 88 +- libsql-server/src/hrana/batch.rs | 3 +- libsql-server/src/http/user/dump.rs | 5 +- libsql-server/src/http/user/mod.rs | 3 +- libsql-server/src/lib.rs | 255 +++- libsql-server/src/main.rs | 1 + .../src/namespace/configurator/fork.rs | 6 +- .../src/namespace/configurator/helpers.rs | 10 +- .../namespace/configurator/libsql_primary.rs | 241 ++++ .../namespace/configurator/libsql_replica.rs | 281 ++++ .../namespace/configurator/libsql_schema.rs | 168 +++ .../configurator/libsql_wal_replica.rs | 139 -- .../src/namespace/configurator/mod.rs | 9 +- .../src/namespace/configurator/primary.rs | 8 +- .../src/namespace/configurator/replica.rs | 29 +- .../src/namespace/configurator/schema.rs | 10 +- libsql-server/src/namespace/meta_store.rs | 2 +- libsql-server/src/namespace/name.rs | 4 + libsql-server/src/query_result_builder.rs | 5 +- .../src/replication/primary/logger.rs | 2 +- .../primary/replication_logger_wal.rs | 2 +- .../src/replication/snapshot_store.rs | 2 +- libsql-server/src/rpc/mod.rs | 25 +- libsql-server/src/rpc/proxy.rs | 2 +- libsql-server/src/rpc/replication/auth.rs | 39 + .../src/rpc/replication/libsql_replicator.rs | 217 +++ libsql-server/src/rpc/replication/mod.rs | 5 + .../rpc/{ => replication}/replication_log.rs | 48 +- .../replication_log_proxy.rs | 0 libsql-server/src/rpc/streaming_exec.rs | 18 +- libsql-server/src/schema/migration.rs | 2 +- libsql-server/src/schema/mod.rs | 4 +- libsql-server/src/schema/scheduler.rs | 40 +- libsql-server/src/test/bottomless.rs | 13 +- libsql-wal/src/checkpointer.rs | 1 + libsql-wal/src/io/file.rs | 87 ++ libsql-wal/src/io/mod.rs | 8 +- libsql-wal/src/lib.rs | 12 +- libsql-wal/src/registry.rs | 40 +- libsql-wal/src/replication/injector.rs | 5 +- libsql-wal/src/replication/replicator.rs | 18 +- libsql-wal/src/replication/storage.rs | 4 +- libsql-wal/src/segment/compacted.rs | 11 + libsql-wal/src/segment/current.rs | 7 + libsql-wal/src/segment/list.rs | 71 +- libsql-wal/src/segment/mod.rs | 3 +- libsql-wal/src/shared_wal.rs | 29 +- libsql-wal/src/storage/async_storage.rs | 52 +- libsql-wal/src/storage/backend/mod.rs | 22 +- libsql-wal/src/storage/backend/s3.rs | 10 +- libsql-wal/src/storage/job.rs | 55 +- libsql-wal/src/storage/mod.rs | 196 ++- libsql-wal/src/storage/scheduler.rs | 24 +- libsql-wal/tests/flaky_fs.rs | 4 - 70 files changed, 3497 insertions(+), 2334 deletions(-) create mode 100644 libsql-server/src/connection/connection_core.rs create mode 100644 libsql-server/src/connection/legacy.rs create mode 100644 libsql-server/src/database/libsql_primary.rs create mode 100644 libsql-server/src/database/libsql_replica.rs create mode 100644 libsql-server/src/namespace/configurator/libsql_primary.rs create mode 100644 libsql-server/src/namespace/configurator/libsql_replica.rs create mode 100644 libsql-server/src/namespace/configurator/libsql_schema.rs delete mode 100644 libsql-server/src/namespace/configurator/libsql_wal_replica.rs create mode 100644 libsql-server/src/rpc/replication/auth.rs create mode 100644 libsql-server/src/rpc/replication/libsql_replicator.rs create mode 100644 libsql-server/src/rpc/replication/mod.rs rename libsql-server/src/rpc/{ => replication}/replication_log.rs (89%) rename libsql-server/src/rpc/{ => replication}/replication_log_proxy.rs (100%) diff --git a/Cargo.lock b/Cargo.lock index 7e19e03e9a..3579f76ec7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -321,60 +321,30 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" -[[package]] -name = "aws-config" -version = "0.55.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcdcf0d683fe9c23d32cf5b53c9918ea0a500375a9fb20109802552658e576c9" -dependencies = [ - "aws-credential-types 0.55.3", - "aws-http", - "aws-sdk-sso 0.28.0", - "aws-sdk-sts 0.28.0", - "aws-smithy-async 0.55.3", - "aws-smithy-client", - "aws-smithy-http 0.55.3", - "aws-smithy-http-tower", - "aws-smithy-json 0.55.3", - "aws-smithy-types 0.55.3", - "aws-types 0.55.3", - "bytes", - "fastrand 1.9.0", - "hex", - "http 0.2.12", - "hyper 0.14.30", - "ring 0.16.20", - "time", - "tokio", - "tower", - "tracing", - "zeroize", -] - [[package]] name = "aws-config" version = "1.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "caf6cfe2881cb1fcbba9ae946fb9a6480d3b7a714ca84c74925014a89ef3387a" dependencies = [ - "aws-credential-types 1.2.0", + "aws-credential-types", "aws-runtime", - "aws-sdk-sso 1.34.0", + "aws-sdk-sso", "aws-sdk-ssooidc", - "aws-sdk-sts 1.34.0", - "aws-smithy-async 1.2.1", - "aws-smithy-http 0.60.9", - "aws-smithy-json 0.60.7", + "aws-sdk-sts", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", "aws-smithy-runtime", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.0", - "aws-types 1.3.3", + "aws-smithy-types", + "aws-types", "bytes", - "fastrand 2.1.0", + "fastrand", "hex", "http 0.2.12", "hyper 0.14.30", - "ring 0.17.8", + "ring", "time", "tokio", "tracing", @@ -382,81 +352,34 @@ dependencies = [ "zeroize", ] -[[package]] -name = "aws-credential-types" -version = "0.55.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fcdb2f7acbc076ff5ad05e7864bdb191ca70a6fd07668dc3a1a8bcd051de5ae" -dependencies = [ - "aws-smithy-async 0.55.3", - "aws-smithy-types 0.55.3", - "fastrand 1.9.0", - "tokio", - "tracing", - "zeroize", -] - [[package]] name = "aws-credential-types" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e16838e6c9e12125face1c1eff1343c75e3ff540de98ff7ebd61874a89bcfeb9" dependencies = [ - "aws-smithy-async 1.2.1", + "aws-smithy-async", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.0", + "aws-smithy-types", "zeroize", ] -[[package]] -name = "aws-endpoint" -version = "0.55.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8cce1c41a6cfaa726adee9ebb9a56fcd2bbfd8be49fd8a04c5e20fd968330b04" -dependencies = [ - "aws-smithy-http 0.55.3", - "aws-smithy-types 0.55.3", - "aws-types 0.55.3", - "http 0.2.12", - "regex", - "tracing", -] - -[[package]] -name = "aws-http" -version = "0.55.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "aadbc44e7a8f3e71c8b374e03ecd972869eb91dd2bc89ed018954a52ba84bc44" -dependencies = [ - "aws-credential-types 0.55.3", - "aws-smithy-http 0.55.3", - "aws-smithy-types 0.55.3", - "aws-types 0.55.3", - "bytes", - "http 0.2.12", - "http-body 0.4.6", - "lazy_static", - "percent-encoding", - "pin-project-lite", - "tracing", -] - [[package]] name = "aws-runtime" version = "1.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "87c5f920ffd1e0526ec9e70e50bf444db50b204395a0fa7016bbf9e31ea1698f" dependencies = [ - "aws-credential-types 1.2.0", - "aws-sigv4 1.2.3", - "aws-smithy-async 1.2.1", - "aws-smithy-eventstream 0.60.4", - "aws-smithy-http 0.60.9", + "aws-credential-types", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-eventstream", + "aws-smithy-http", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.0", - "aws-types 1.3.3", + "aws-smithy-types", + "aws-types", "bytes", - "fastrand 2.1.0", + "fastrand", "http 0.2.12", "http-body 0.4.6", "percent-encoding", @@ -465,39 +388,6 @@ dependencies = [ "uuid", ] -[[package]] -name = "aws-sdk-s3" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fba197193cbb4bcb6aad8d99796b2291f36fa89562ded5d4501363055b0de89f" -dependencies = [ - "aws-credential-types 0.55.3", - "aws-endpoint", - "aws-http", - "aws-sig-auth", - "aws-sigv4 0.55.3", - "aws-smithy-async 0.55.3", - "aws-smithy-checksums 0.55.3", - "aws-smithy-client", - "aws-smithy-eventstream 0.55.3", - "aws-smithy-http 0.55.3", - "aws-smithy-http-tower", - "aws-smithy-json 0.55.3", - "aws-smithy-types 0.55.3", - "aws-smithy-xml 0.55.3", - "aws-types 0.55.3", - "bytes", - "http 0.2.12", - "http-body 0.4.6", - "once_cell", - "percent-encoding", - "regex", - "tokio-stream", - "tower", - "tracing", - "url", -] - [[package]] name = "aws-sdk-s3" version = "1.40.0" @@ -505,21 +395,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8367c403fdf27690684b926a46ed9524099a69dd5dfcef62028bf4096b5b809f" dependencies = [ "ahash", - "aws-credential-types 1.2.0", + "aws-credential-types", "aws-runtime", - "aws-sigv4 1.2.3", - "aws-smithy-async 1.2.1", - "aws-smithy-checksums 0.60.11", - "aws-smithy-eventstream 0.60.4", - "aws-smithy-http 0.60.9", - "aws-smithy-json 0.60.7", + "aws-sigv4", + "aws-smithy-async", + "aws-smithy-checksums", + "aws-smithy-eventstream", + "aws-smithy-http", + "aws-smithy-json", "aws-smithy-runtime", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.0", - "aws-smithy-xml 0.60.8", - "aws-types 1.3.3", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", "bytes", - "fastrand 2.1.0", + "fastrand", "hex", "hmac", "http 0.2.12", @@ -533,46 +423,21 @@ dependencies = [ "url", ] -[[package]] -name = "aws-sdk-sso" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8b812340d86d4a766b2ca73f740dfd47a97c2dff0c06c8517a16d88241957e4" -dependencies = [ - "aws-credential-types 0.55.3", - "aws-endpoint", - "aws-http", - "aws-sig-auth", - "aws-smithy-async 0.55.3", - "aws-smithy-client", - "aws-smithy-http 0.55.3", - "aws-smithy-http-tower", - "aws-smithy-json 0.55.3", - "aws-smithy-types 0.55.3", - "aws-types 0.55.3", - "bytes", - "http 0.2.12", - "regex", - "tokio-stream", - "tower", - "tracing", -] - [[package]] name = "aws-sdk-sso" version = "1.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cdcfae7bf8b8f14cade7579ffa8956fcee91dc23633671096b4b5de7d16f682a" dependencies = [ - "aws-credential-types 1.2.0", + "aws-credential-types", "aws-runtime", - "aws-smithy-async 1.2.1", - "aws-smithy-http 0.60.9", - "aws-smithy-json 0.60.7", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", "aws-smithy-runtime", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.0", - "aws-types 1.3.3", + "aws-smithy-types", + "aws-types", "bytes", "http 0.2.12", "once_cell", @@ -586,15 +451,15 @@ version = "1.35.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33b30def8f02ba81276d5dbc22e7bf3bed20d62d1b175eef82680d6bdc7a6f4c" dependencies = [ - "aws-credential-types 1.2.0", + "aws-credential-types", "aws-runtime", - "aws-smithy-async 1.2.1", - "aws-smithy-http 0.60.9", - "aws-smithy-json 0.60.7", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", "aws-smithy-runtime", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.0", - "aws-types 1.3.3", + "aws-smithy-types", + "aws-types", "bytes", "http 0.2.12", "once_cell", @@ -602,102 +467,40 @@ dependencies = [ "tracing", ] -[[package]] -name = "aws-sdk-sts" -version = "0.28.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "265fac131fbfc188e5c3d96652ea90ecc676a934e3174eaaee523c6cec040b3b" -dependencies = [ - "aws-credential-types 0.55.3", - "aws-endpoint", - "aws-http", - "aws-sig-auth", - "aws-smithy-async 0.55.3", - "aws-smithy-client", - "aws-smithy-http 0.55.3", - "aws-smithy-http-tower", - "aws-smithy-json 0.55.3", - "aws-smithy-query 0.55.3", - "aws-smithy-types 0.55.3", - "aws-smithy-xml 0.55.3", - "aws-types 0.55.3", - "bytes", - "http 0.2.12", - "regex", - "tower", - "tracing", -] - [[package]] name = "aws-sdk-sts" version = "1.34.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0804f840ad31537d5d1a4ec48d59de5e674ad05f1db7d3def2c9acadaf1f7e60" dependencies = [ - "aws-credential-types 1.2.0", + "aws-credential-types", "aws-runtime", - "aws-smithy-async 1.2.1", - "aws-smithy-http 0.60.9", - "aws-smithy-json 0.60.7", - "aws-smithy-query 0.60.7", + "aws-smithy-async", + "aws-smithy-http", + "aws-smithy-json", + "aws-smithy-query", "aws-smithy-runtime", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.0", - "aws-smithy-xml 0.60.8", - "aws-types 1.3.3", + "aws-smithy-types", + "aws-smithy-xml", + "aws-types", "http 0.2.12", "once_cell", "regex-lite", "tracing", ] -[[package]] -name = "aws-sig-auth" -version = "0.55.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b94acb10af0c879ecd5c7bdf51cda6679a0a4f4643ce630905a77673bfa3c61" -dependencies = [ - "aws-credential-types 0.55.3", - "aws-sigv4 0.55.3", - "aws-smithy-eventstream 0.55.3", - "aws-smithy-http 0.55.3", - "aws-types 0.55.3", - "http 0.2.12", - "tracing", -] - -[[package]] -name = "aws-sigv4" -version = "0.55.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d2ce6f507be68e968a33485ced670111d1cbad161ddbbab1e313c03d37d8f4c" -dependencies = [ - "aws-smithy-eventstream 0.55.3", - "aws-smithy-http 0.55.3", - "bytes", - "form_urlencoded", - "hex", - "hmac", - "http 0.2.12", - "once_cell", - "percent-encoding", - "regex", - "sha2", - "time", - "tracing", -] - [[package]] name = "aws-sigv4" version = "1.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5df1b0fa6be58efe9d4ccc257df0a53b89cd8909e86591a13ca54817c87517be" dependencies = [ - "aws-credential-types 1.2.0", - "aws-smithy-eventstream 0.60.4", - "aws-smithy-http 0.60.9", + "aws-credential-types", + "aws-smithy-eventstream", + "aws-smithy-http", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.0", + "aws-smithy-types", "bytes", "crypto-bigint 0.5.5", "form_urlencoded", @@ -708,7 +511,7 @@ dependencies = [ "once_cell", "p256", "percent-encoding", - "ring 0.17.8", + "ring", "sha2", "subtle", "time", @@ -716,18 +519,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "aws-smithy-async" -version = "0.55.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13bda3996044c202d75b91afeb11a9afae9db9a721c6a7a427410018e286b880" -dependencies = [ - "futures-util", - "pin-project-lite", - "tokio", - "tokio-stream", -] - [[package]] name = "aws-smithy-async" version = "1.2.1" @@ -739,35 +530,14 @@ dependencies = [ "tokio", ] -[[package]] -name = "aws-smithy-checksums" -version = "0.55.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07ed8b96d95402f3f6b8b57eb4e0e45ee365f78b1a924faf20ff6e97abf1eae6" -dependencies = [ - "aws-smithy-http 0.55.3", - "aws-smithy-types 0.55.3", - "bytes", - "crc32c", - "crc32fast", - "hex", - "http 0.2.12", - "http-body 0.4.6", - "md-5", - "pin-project-lite", - "sha1", - "sha2", - "tracing", -] - [[package]] name = "aws-smithy-checksums" version = "0.60.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "48c4134cf3adaeacff34d588dbe814200357b0c466d730cf1c0d8054384a2de4" dependencies = [ - "aws-smithy-http 0.60.9", - "aws-smithy-types 1.2.0", + "aws-smithy-http", + "aws-smithy-types", "bytes", "crc32c", "crc32fast", @@ -781,84 +551,26 @@ dependencies = [ "tracing", ] -[[package]] -name = "aws-smithy-client" -version = "0.55.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a86aa6e21e86c4252ad6a0e3e74da9617295d8d6e374d552be7d3059c41cedd" -dependencies = [ - "aws-smithy-async 0.55.3", - "aws-smithy-http 0.55.3", - "aws-smithy-http-tower", - "aws-smithy-types 0.55.3", - "bytes", - "fastrand 1.9.0", - "http 0.2.12", - "http-body 0.4.6", - "hyper 0.14.30", - "hyper-rustls 0.23.2", - "lazy_static", - "pin-project-lite", - "rustls 0.20.9", - "tokio", - "tower", - "tracing", -] - -[[package]] -name = "aws-smithy-eventstream" -version = "0.55.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "460c8da5110835e3d9a717c61f5556b20d03c32a1dec57f8fc559b360f733bb8" -dependencies = [ - "aws-smithy-types 0.55.3", - "bytes", - "crc32fast", -] - [[package]] name = "aws-smithy-eventstream" version = "0.60.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e6363078f927f612b970edf9d1903ef5cef9a64d1e8423525ebb1f0a1633c858" dependencies = [ - "aws-smithy-types 1.2.0", + "aws-smithy-types", "bytes", "crc32fast", ] -[[package]] -name = "aws-smithy-http" -version = "0.55.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b3b693869133551f135e1f2c77cb0b8277d9e3e17feaf2213f735857c4f0d28" -dependencies = [ - "aws-smithy-eventstream 0.55.3", - "aws-smithy-types 0.55.3", - "bytes", - "bytes-utils", - "futures-core", - "http 0.2.12", - "http-body 0.4.6", - "hyper 0.14.30", - "once_cell", - "percent-encoding", - "pin-project-lite", - "pin-utils", - "tokio", - "tokio-util", - "tracing", -] - [[package]] name = "aws-smithy-http" version = "0.60.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9cd0ae3d97daa0a2bf377a4d8e8e1362cae590c4a1aad0d40058ebca18eb91e" dependencies = [ - "aws-smithy-eventstream 0.60.4", + "aws-smithy-eventstream", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.0", + "aws-smithy-types", "bytes", "bytes-utils", "futures-core", @@ -871,48 +583,13 @@ dependencies = [ "tracing", ] -[[package]] -name = "aws-smithy-http-tower" -version = "0.55.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ae4f6c5798a247fac98a867698197d9ac22643596dc3777f0c76b91917616b9" -dependencies = [ - "aws-smithy-http 0.55.3", - "aws-smithy-types 0.55.3", - "bytes", - "http 0.2.12", - "http-body 0.4.6", - "pin-project-lite", - "tower", - "tracing", -] - -[[package]] -name = "aws-smithy-json" -version = "0.55.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "23f9f42fbfa96d095194a632fbac19f60077748eba536eb0b9fecc28659807f8" -dependencies = [ - "aws-smithy-types 0.55.3", -] - [[package]] name = "aws-smithy-json" version = "0.60.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4683df9469ef09468dad3473d129960119a0d3593617542b7d52086c8486f2d6" dependencies = [ - "aws-smithy-types 1.2.0", -] - -[[package]] -name = "aws-smithy-query" -version = "0.55.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "98819eb0b04020a1c791903533b638534ae6c12e2aceda3e6e6fba015608d51d" -dependencies = [ - "aws-smithy-types 0.55.3", - "urlencoding", + "aws-smithy-types", ] [[package]] @@ -921,7 +598,7 @@ version = "0.60.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f2fbd61ceb3fe8a1cb7352e42689cec5335833cd9f94103a61e98f9bb61c64bb" dependencies = [ - "aws-smithy-types 1.2.0", + "aws-smithy-types", "urlencoding", ] @@ -931,12 +608,12 @@ version = "1.6.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ce87155eba55e11768b8c1afa607f3e864ae82f03caf63258b37455b0ad02537" dependencies = [ - "aws-smithy-async 1.2.1", - "aws-smithy-http 0.60.9", + "aws-smithy-async", + "aws-smithy-http", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.0", + "aws-smithy-types", "bytes", - "fastrand 2.1.0", + "fastrand", "h2", "http 0.2.12", "http-body 0.4.6", @@ -958,8 +635,8 @@ version = "1.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "30819352ed0a04ecf6a2f3477e344d2d1ba33d43e0f09ad9047c12e0d923616f" dependencies = [ - "aws-smithy-async 1.2.1", - "aws-smithy-types 1.2.0", + "aws-smithy-async", + "aws-smithy-types", "bytes", "http 0.2.12", "http 1.1.0", @@ -969,19 +646,6 @@ dependencies = [ "zeroize", ] -[[package]] -name = "aws-smithy-types" -version = "0.55.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "16a3d0bf4f324f4ef9793b86a1701d9700fbcdbd12a846da45eed104c634c6e8" -dependencies = [ - "base64-simd", - "itoa", - "num-integer", - "ryu", - "time", -] - [[package]] name = "aws-smithy-types" version = "1.2.0" @@ -1014,19 +678,10 @@ version = "0.60.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7f280f434214856abace637b1f944d50ccca216814813acd195cdd7f206ce17f" dependencies = [ - "aws-smithy-types 1.2.0", + "aws-smithy-types", "time", ] -[[package]] -name = "aws-smithy-xml" -version = "0.55.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1b9d12875731bd07e767be7baad95700c3137b56730ec9ddeedb52a5e5ca63b" -dependencies = [ - "xmlparser", -] - [[package]] name = "aws-smithy-xml" version = "0.60.8" @@ -1036,32 +691,16 @@ dependencies = [ "xmlparser", ] -[[package]] -name = "aws-types" -version = "0.55.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6dd209616cc8d7bfb82f87811a5c655dc97537f592689b18743bddf5dc5c4829" -dependencies = [ - "aws-credential-types 0.55.3", - "aws-smithy-async 0.55.3", - "aws-smithy-client", - "aws-smithy-http 0.55.3", - "aws-smithy-types 0.55.3", - "http 0.2.12", - "rustc_version", - "tracing", -] - [[package]] name = "aws-types" version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5221b91b3e441e6675310829fd8984801b772cb1546ef6c0e54dec9f1ac13fef" dependencies = [ - "aws-credential-types 1.2.0", - "aws-smithy-async 1.2.1", + "aws-credential-types", + "aws-smithy-async", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.0", + "aws-smithy-types", "rustc_version", "tracing", ] @@ -1303,8 +942,8 @@ dependencies = [ "anyhow", "arc-swap", "async-compression 0.4.11", - "aws-config 1.5.4", - "aws-sdk-s3 1.40.0", + "aws-config", + "aws-sdk-s3", "bytes", "chrono", "futures-core", @@ -1325,9 +964,9 @@ version = "0.1.14" dependencies = [ "anyhow", "async-compression 0.4.11", - "aws-config 1.5.4", - "aws-sdk-s3 1.40.0", - "aws-smithy-types 1.2.0", + "aws-config", + "aws-sdk-s3", + "aws-smithy-types", "bottomless", "bytes", "chrono", @@ -2383,15 +2022,6 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7360491ce676a36bf9bb3c56c1aa791658183a54d2744120f27285738d90465a" -[[package]] -name = "fastrand" -version = "1.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51093e27b0797c359783294ca4f0a911c270184cb10f85783b118614a1501be" -dependencies = [ - "instant", -] - [[package]] name = "fastrand" version = "2.1.0" @@ -3016,21 +2646,6 @@ dependencies = [ "tokio", ] -[[package]] -name = "hyper-rustls" -version = "0.23.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1788965e61b367cd03a62950836d5cd41560c3577d90e40e0819373194d1661c" -dependencies = [ - "http 0.2.12", - "hyper 0.14.30", - "log", - "rustls 0.20.9", - "rustls-native-certs 0.6.3", - "tokio", - "tokio-rustls 0.23.4", -] - [[package]] name = "hyper-rustls" version = "0.24.1" @@ -3223,15 +2838,6 @@ dependencies = [ "similar", ] -[[package]] -name = "instant" -version = "0.1.13" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0242819d153cba4b4b05a5a8f2a7e9bbf97b6055b2a002b395c96b5ff3c0222" -dependencies = [ - "cfg-if", -] - [[package]] name = "io-extras" version = "0.17.4" @@ -3362,7 +2968,7 @@ dependencies = [ "base64 0.21.7", "js-sys", "pem", - "ring 0.17.8", + "ring", "serde", "serde_json", "simple_asn1", @@ -3542,8 +3148,9 @@ dependencies = [ "async-stream", "async-tempfile", "async-trait", - "aws-config 0.55.3", - "aws-sdk-s3 0.28.0", + "aws-config", + "aws-sdk-s3", + "aws-smithy-runtime", "axum", "axum-extra", "base64 0.21.7", @@ -3599,7 +3206,7 @@ dependencies = [ "regex", "reqwest", "rheaper", - "ring 0.17.8", + "ring", "rustls 0.21.12", "rustls-pemfile 1.0.4", "s3s 0.8.1", @@ -3704,9 +3311,9 @@ dependencies = [ "arc-swap", "async-lock 3.4.0", "async-stream", - "aws-config 1.5.4", - "aws-credential-types 1.2.0", - "aws-sdk-s3 1.40.0", + "aws-config", + "aws-credential-types", + "aws-sdk-s3", "aws-smithy-runtime", "bitflags 2.6.0", "bytes", @@ -5011,21 +4618,6 @@ dependencies = [ "zerocopy", ] -[[package]] -name = "ring" -version = "0.16.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3053cf52e236a3ed746dfc745aa9cacf1b791d846bdaf412f60a8d7d6e17c8fc" -dependencies = [ - "cc", - "libc", - "once_cell", - "spin 0.5.2", - "untrusted 0.7.1", - "web-sys", - "winapi", -] - [[package]] name = "ring" version = "0.17.8" @@ -5036,8 +4628,8 @@ dependencies = [ "cfg-if", "getrandom", "libc", - "spin 0.9.8", - "untrusted 0.9.0", + "spin", + "untrusted", "windows-sys 0.52.0", ] @@ -5101,18 +4693,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "rustls" -version = "0.20.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b80e3dec595989ea8510028f30c408a4630db12c9cbb8de34203b89d6577e99" -dependencies = [ - "log", - "ring 0.16.20", - "sct", - "webpki", -] - [[package]] name = "rustls" version = "0.21.12" @@ -5120,7 +4700,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f56a14d1f48b391359b22f731fd4bd7e43c97f3c50eee276f3aa09c94784d3e" dependencies = [ "log", - "ring 0.17.8", + "ring", "rustls-webpki 0.101.7", "sct", ] @@ -5132,7 +4712,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" dependencies = [ "log", - "ring 0.17.8", + "ring", "rustls-pki-types", "rustls-webpki 0.102.5", "subtle", @@ -5195,8 +4775,8 @@ version = "0.101.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b6275d1ee7a1cd780b64aca7726599a1dbc893b1e64144529e55c3c2f745765" dependencies = [ - "ring 0.17.8", - "untrusted 0.9.0", + "ring", + "untrusted", ] [[package]] @@ -5205,9 +4785,9 @@ version = "0.102.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f9a6fccd794a42c2c105b513a2f62bc3fd8f3ba57a4593677ceb0bd035164d78" dependencies = [ - "ring 0.17.8", + "ring", "rustls-pki-types", - "untrusted 0.9.0", + "untrusted", ] [[package]] @@ -5325,9 +4905,9 @@ version = "0.10.1-dev" source = "git+https://github.com/Nugine/s3s#29bf39cb72507505d09d9d7637f57784dbfc0a40" dependencies = [ "async-trait", - "aws-sdk-s3 1.40.0", + "aws-sdk-s3", "aws-smithy-runtime-api", - "aws-smithy-types 1.2.0", + "aws-smithy-types", "aws-smithy-types-convert", "hyper 1.4.1", "s3s 0.10.1-dev", @@ -5435,8 +5015,8 @@ version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "da046153aa2352493d6cb7da4b6e5c0c057d8a1d0a9aa8560baffdd945acd414" dependencies = [ - "ring 0.17.8", - "untrusted 0.9.0", + "ring", + "untrusted", ] [[package]] @@ -5758,12 +5338,6 @@ dependencies = [ "windows-sys 0.52.0", ] -[[package]] -name = "spin" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e63cff320ae2c57904679ba7cb63280a3dc4613885beafb148ee7bf9aa9042d" - [[package]] name = "spin" version = "0.9.8" @@ -5952,7 +5526,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "85b77fafb263dd9d05cbeac119526425676db3784113aa9295c88498cbf8bff1" dependencies = [ "cfg-if", - "fastrand 2.1.0", + "fastrand", "rustix 0.38.34", "windows-sys 0.52.0", ] @@ -6111,17 +5685,6 @@ dependencies = [ "syn 2.0.70", ] -[[package]] -name = "tokio-rustls" -version = "0.23.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c43ee83903113e03984cb9e5cebe6c04a5116269e900e3ddba8f068a62adda59" -dependencies = [ - "rustls 0.20.9", - "tokio", - "webpki", -] - [[package]] name = "tokio-rustls" version = "0.24.1" @@ -6591,12 +6154,6 @@ version = "0.2.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c" -[[package]] -name = "untrusted" -version = "0.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a156c684c91ea7d62626509bce3cb4e1d9ed5c4d978f7b4352658f96a4c26b4a" - [[package]] name = "untrusted" version = "0.9.0" @@ -7155,16 +6712,6 @@ dependencies = [ "wasm-bindgen", ] -[[package]] -name = "webpki" -version = "0.22.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ed63aea5ce73d0ff405984102c42de94fc55a6b75765d621c65262469b3c9b53" -dependencies = [ - "ring 0.17.8", - "untrusted 0.9.0", -] - [[package]] name = "webpki-roots" version = "0.25.4" diff --git a/libsql-replication/src/injector/libsql_injector.rs b/libsql-replication/src/injector/libsql_injector.rs index f867a29245..7c01522e1d 100644 --- a/libsql-replication/src/injector/libsql_injector.rs +++ b/libsql-replication/src/injector/libsql_injector.rs @@ -14,6 +14,12 @@ pub struct LibsqlInjector { injector: Injector, } +impl LibsqlInjector { + pub fn new(injector: Injector) -> Self { + Self { injector } + } +} + impl super::Injector for LibsqlInjector { async fn inject_frame(&mut self, frame: RpcFrame) -> Result> { // this is a bit annoying be we want to read the frame, and it has to be aligned, so we @@ -24,6 +30,7 @@ impl super::Injector for LibsqlInjector { todo!("invalid frame"); } wal_frame.as_bytes_mut().copy_from_slice(&frame.data[..]); + Ok(self .injector .insert_frame(wal_frame) diff --git a/libsql-replication/src/rpc.rs b/libsql-replication/src/rpc.rs index a538bc4c28..3b31bd2b21 100644 --- a/libsql-replication/src/rpc.rs +++ b/libsql-replication/src/rpc.rs @@ -23,9 +23,14 @@ pub mod proxy { pub mod replication { #![allow(clippy::all)] + use std::pin::Pin; use uuid::Uuid; + use tokio_stream::Stream; + pub type BoxStream<'a, T> = Pin + Send + 'a>>; + + use self::replication_log_server::ReplicationLog; include!("generated/wal_log.rs"); pub const NO_HELLO_ERROR_MSG: &str = "NO_HELLO"; @@ -53,6 +58,45 @@ pub mod replication { } } } + + pub type BoxReplicationService = Box>, + SnapshotStream = BoxStream<'static, Result>, + >>; + + #[tonic::async_trait] + impl ReplicationLog for BoxReplicationService { + type LogEntriesStream = BoxStream<'static, Result>; + type SnapshotStream = BoxStream<'static, Result>; + + async fn log_entries( + &self, + req: tonic::Request, + ) -> Result, tonic::Status> { + self.as_ref().log_entries(req).await + } + + async fn batch_log_entries( + &self, + req: tonic::Request, + ) -> Result, tonic::Status> { + self.as_ref().batch_log_entries(req).await + } + + async fn hello( + &self, + req: tonic::Request, + ) -> Result, tonic::Status> { + self.as_ref().hello(req).await + } + + async fn snapshot( + &self, + req: tonic::Request, + ) -> Result, tonic::Status> { + self.as_ref().snapshot(req).await + } + } } pub mod metadata { diff --git a/libsql-server/Cargo.toml b/libsql-server/Cargo.toml index 934a400786..c3b59f0766 100644 --- a/libsql-server/Cargo.toml +++ b/libsql-server/Cargo.toml @@ -36,7 +36,7 @@ hyper-tungstenite = "0.11" itertools = "0.10.5" jsonwebtoken = "9" libsql = { path = "../libsql/", optional = true } -libsql_replication = { path = "../libsql-replication" } +libsql_replication = { path = "../libsql-replication", features = ["libsql_wal"] } libsql-wal = { path = "../libsql-wal/" } libsql-storage = { path = "../libsql-storage", optional = true } metrics = "0.21.1" @@ -91,11 +91,12 @@ async-recursion = "1" mimalloc = "0.1.42" rheaper = { git = "https://github.com/MarinPostma/rheaper.git", tag = "v0.2.0", default-features = false, features = ["allocator"] } tar = "0.4.41" +aws-config = "1" +aws-sdk-s3 = "1" +aws-smithy-runtime = "1.6.2" [dev-dependencies] arbitrary = { version = "1.3.0", features = ["derive_arbitrary"] } -aws-config = "0.55" -aws-sdk-s3 = "0.28" env_logger = "0.10" hyper = { workspace = true, features = ["client"] } insta = { version = "1.26.0", features = ["json"] } diff --git a/libsql-server/src/connection/connection_core.rs b/libsql-server/src/connection/connection_core.rs new file mode 100644 index 0000000000..216348f102 --- /dev/null +++ b/libsql-server/src/connection/connection_core.rs @@ -0,0 +1,809 @@ +use std::ffi::{c_int, c_void}; +use std::time::{Duration, Instant}; +use std::sync::Arc; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::path::{Path, PathBuf}; + +use libsql_sys::wal::{WalManager, Wal}; +use metrics::histogram; +use parking_lot::Mutex; +use tokio::sync::watch; + +use crate::stats::{Stats, StatsUpdateMessage}; +use crate::replication::FrameNo; +use crate::query_result_builder::{QueryBuilderConfig, QueryResultBuilder}; +use crate::query_analysis::StmtKind; +use crate::namespace::ResolveNamespacePathFn; +use crate::namespace::meta_store::MetaStoreHandle; +use crate::namespace::broadcasters::BroadcasterHandle; +use crate::metrics::{PROGRAM_EXEC_COUNT, QUERY_CANCELED, VACUUM_COUNT, WAL_CHECKPOINT_COUNT}; +use crate::error::Error; +use crate::connection::legacy::open_conn_active_checkpoint; +use crate::{Result, BLOCKING_RT}; + +use super::config::DatabaseConfig; +use super::program::{DescribeCol, DescribeParam, DescribeResponse, Program, Vm}; + +/// The base connection type, shared between legacy and libsql-wal implementations +pub(super) struct CoreConnection { + conn: libsql_sys::Connection, + stats: Arc, + config_store: MetaStoreHandle, + builder_config: QueryBuilderConfig, + current_frame_no_receiver: watch::Receiver>, + block_writes: Arc, + resolve_attach_path: ResolveNamespacePathFn, + forced_rollback: bool, + broadcaster: BroadcasterHandle, + hooked: bool, + canceled: Arc, +} + +fn update_stats( + stats: &Stats, + sql: String, + rows_read: u64, + rows_written: u64, + mem_used: u64, + elapsed: Duration, +) { + stats.send(StatsUpdateMessage { + sql, + elapsed, + rows_read, + rows_written, + mem_used, + }); +} + +impl CoreConnection { + pub(super) fn new>( + path: &Path, + extensions: Arc<[PathBuf]>, + wal_manager: T, + stats: Arc, + broadcaster: BroadcasterHandle, + config_store: MetaStoreHandle, + builder_config: QueryBuilderConfig, + current_frame_no_receiver: watch::Receiver>, + block_writes: Arc, + resolve_attach_path: ResolveNamespacePathFn, + ) -> Result { + let conn = open_conn_active_checkpoint( + path, + wal_manager, + None, + builder_config.auto_checkpoint, + builder_config.encryption_config.clone(), + )?; + + let config = config_store.get(); + conn.pragma_update(None, "max_page_count", config.max_db_pages)?; + tracing::debug!("setting PRAGMA synchronous to {}", config.durability_mode); + conn.pragma_update(None, "synchronous", config.durability_mode)?; + + conn.set_limit( + rusqlite::limits::Limit::SQLITE_LIMIT_LENGTH, + config.max_row_size as i32, + ); + + unsafe { + const MAX_RETRIES: c_int = 8; + extern "C" fn do_nothing(_: *mut c_void, n: c_int) -> c_int { + (n < MAX_RETRIES) as _ + } + libsql_sys::ffi::sqlite3_busy_handler( + conn.handle(), + Some(do_nothing), + std::ptr::null_mut(), + ); + } + + let canceled = Arc::new(AtomicBool::new(false)); + + conn.progress_handler(100, { + let canceled = canceled.clone(); + Some(move || { + let canceled = canceled.load(Ordering::Relaxed); + if canceled { + QUERY_CANCELED.increment(1); + tracing::trace!("request canceled"); + } + canceled + }) + }); + + let this = Self { + conn, + stats, + config_store, + builder_config, + current_frame_no_receiver, + block_writes, + resolve_attach_path, + forced_rollback: false, + broadcaster, + hooked: false, + canceled, + }; + + for ext in extensions.iter() { + unsafe { + let _guard = rusqlite::LoadExtensionGuard::new(&this.conn).unwrap(); + if let Err(e) = this.conn.load_extension(ext, None) { + tracing::error!("failed to load extension: {}", ext.display()); + Err(e)?; + } + tracing::trace!("Loaded extension {}", ext.display()); + } + } + + Ok(this) + } + + pub(super) fn raw_mut(&mut self) -> &mut libsql_sys::Connection { + &mut self.conn + } + + pub(super) fn raw(&self) -> &libsql_sys::Connection { + &self.conn + } + + pub(super) fn config(&self) -> Arc{ + self.config_store.get() + } + + pub(super) async fn run_async( + this: Arc>, + pgm: Program, + builder: B, + ) -> Result { + struct Bomb { + canceled: Arc, + defused: bool, + } + + impl Drop for Bomb { + fn drop(&mut self) { + if !self.defused { + tracing::trace!("cancelling request"); + self.canceled.store(true, Ordering::Relaxed); + } + } + } + + let canceled = { + let cancelled = this.lock().canceled.clone(); + cancelled.store(false, Ordering::Relaxed); + cancelled + }; + + PROGRAM_EXEC_COUNT.increment(1); + + // create the bomb right before spawning the blocking task. + let mut bomb = Bomb { + canceled, + defused: false, + }; + let ret = BLOCKING_RT + .spawn_blocking(move || CoreConnection::run(this, pgm, builder)) + .await + .unwrap(); + + bomb.defused = true; + + ret + } + + pub(super) fn run( + this: Arc>, + pgm: Program, + mut builder: B, + ) -> Result { + let (config, stats, block_writes, resolve_attach_path) = { + let mut lock = this.lock(); + let config = lock.config_store.get(); + let stats = lock.stats.clone(); + let block_writes = lock.block_writes.clone(); + let resolve_attach_path = lock.resolve_attach_path.clone(); + + lock.update_hooks(); + + (config, stats, block_writes, resolve_attach_path) + }; + + builder.init(&this.lock().builder_config)?; + let mut vm = Vm::new( + builder, + &pgm, + move |stmt_kind| { + let should_block = match stmt_kind { + StmtKind::Read | StmtKind::TxnBegin => config.block_reads, + StmtKind::Write => { + config.block_reads + || config.block_writes + || block_writes.load(Ordering::SeqCst) + } + StmtKind::DDL => config.block_reads || config.block_writes, + StmtKind::TxnEnd + | StmtKind::Release + | StmtKind::Savepoint + | StmtKind::Detach + | StmtKind::Attach(_) => false, + }; + + ( + should_block, + should_block.then(|| config.block_reason.clone()).flatten(), + ) + }, + move |sql, rows_read, rows_written, mem_used, elapsed| { + update_stats(&stats, sql, rows_read, rows_written, mem_used, elapsed) + }, + resolve_attach_path, + ); + + let mut has_timeout = false; + while !vm.finished() { + let mut conn = this.lock(); + + if conn.forced_rollback { + has_timeout = true; + conn.forced_rollback = false; + } + + // once there was a timeout, invalidate all the program steps + if has_timeout { + vm.builder().begin_step()?; + vm.builder().step_error(Error::LibSqlTxTimeout)?; + vm.builder().finish_step(0, None)?; + vm.advance(); + continue; + } + + vm.step(&conn.raw())?; + } + + { + let mut lock = this.lock(); + let is_autocommit = lock.conn.is_autocommit(); + let current_fno = *lock.current_frame_no_receiver.borrow_and_update(); + vm.builder().finish(current_fno, is_autocommit)?; + } + + Ok(vm.into_builder()) + } + + fn rollback(&self) { + if let Err(e) = self.conn.execute("ROLLBACK", ()) { + tracing::error!("failed to rollback: {e}"); + } + } + + pub(super) fn force_rollback(&mut self) { + if !self.forced_rollback { + self.rollback(); + self.forced_rollback = true; + } + } + + pub(super) fn checkpoint(&self) -> Result<()> { + let start = Instant::now(); + self.conn + .query_row("PRAGMA wal_checkpoint(TRUNCATE)", (), |row| { + let status: i32 = row.get(0)?; + let wal_frames: i32 = row.get(1)?; + let moved_frames: i32 = row.get(2)?; + tracing::info!( + "WAL checkpoint successful, status: {}, WAL frames: {}, moved frames: {}", + status, + wal_frames, + moved_frames + ); + Ok(()) + })?; + WAL_CHECKPOINT_COUNT.increment(1); + histogram!("libsql_server_wal_checkpoint_time", start.elapsed()); + Ok(()) + } + + pub(super) fn vacuum_if_needed(&self) -> Result<()> { + let page_count = self + .conn + .query_row("PRAGMA page_count", (), |row| row.get::<_, i64>(0))?; + let freelist_count = self + .conn + .query_row("PRAGMA freelist_count", (), |row| row.get::<_, i64>(0))?; + // NOTICE: don't bother vacuuming if we don't have at least 256MiB of data + if page_count >= 65536 && freelist_count * 2 > page_count { + tracing::info!("Vacuuming: pages={page_count} freelist={freelist_count}"); + self.conn.execute("VACUUM", ())?; + } else { + tracing::trace!("Not vacuuming: pages={page_count} freelist={freelist_count}"); + } + VACUUM_COUNT.increment(1); + Ok(()) + } + + pub(super) fn describe(&self, sql: &str) -> crate::Result { + let stmt = self.conn.prepare(sql)?; + + let params = (1..=stmt.parameter_count()) + .map(|param_i| { + let name = stmt.parameter_name(param_i).map(|n| n.into()); + DescribeParam { name } + }) + .collect(); + + let cols = stmt + .columns() + .into_iter() + .map(|col| { + let name = col.name().into(); + let decltype = col.decl_type().map(|t| t.into()); + DescribeCol { name, decltype } + }) + .collect(); + + let is_explain = stmt.is_explain() != 0; + let is_readonly = stmt.readonly(); + Ok(DescribeResponse { + params, + cols, + is_explain, + is_readonly, + }) + } + + pub(super) fn is_autocommit(&self) -> bool { + self.conn.is_autocommit() + } + + fn update_hooks(&mut self) { + let (update_fn, commit_fn, rollback_fn) = if self.hooked { + if self.broadcaster.active() { + return; + } + self.hooked = false; + (None, None, None) + } else { + let Some(broadcaster) = self.broadcaster.get() else { + return; + }; + + let update = broadcaster.clone(); + let update_fn = Some(move |action: _, _: &_, table: &_, _| { + update.notify(table, action); + }); + + let commit = broadcaster.clone(); + let commit_fn = Some(move || { + commit.commit(); + false // allow commit to go through + }); + + let rollback = broadcaster; + let rollback_fn = Some(move || rollback.rollback()); + (update_fn, commit_fn, rollback_fn) + }; + + self.conn.update_hook(update_fn); + self.conn.commit_hook(commit_fn); + self.conn.rollback_hook(rollback_fn); + } +} + +#[cfg(test)] +mod test { + use itertools::Itertools; + #[cfg(not(feature = "durable-wal"))] + use libsql_sys::wal::either::Either as EitherWAL; + #[cfg(feature = "durable-wal")] + use libsql_sys::wal::either::Either3 as EitherWAL; + use libsql_sys::wal::wrapper::PassthroughWalWrapper; + use libsql_sys::wal::{Sqlite3Wal, Sqlite3WalManager}; + use rand::Rng; + use tempfile::tempdir; + use tokio::task::JoinSet; + use tokio::time::Instant; + + use crate::auth::Authenticated; + use crate::connection::legacy::MakeLegacyConnection; + use crate::connection::{Connection as _, RequestContext, TXN_TIMEOUT}; + use crate::namespace::meta_store::{metastore_connection_maker, MetaStore}; + use crate::namespace::NamespaceName; + use crate::query_result_builder::test::{test_driver, TestBuilder}; + use crate::query_result_builder::QueryResultBuilder; + use crate::DEFAULT_AUTO_CHECKPOINT; + + use super::*; + + fn setup_test_conn() -> Arc>> { + let conn = CoreConnection { + conn: libsql_sys::Connection::test(), + stats: Arc::new(Stats::default()), + config_store: MetaStoreHandle::new_test(), + builder_config: QueryBuilderConfig::default(), + current_frame_no_receiver: watch::channel(None).1, + block_writes: Default::default(), + resolve_attach_path: Arc::new(|_| unreachable!()), + forced_rollback: false, + broadcaster: Default::default(), + hooked: false, + canceled: Arc::new(false.into()), + }; + + let conn = Arc::new(Mutex::new(conn)); + + let stmts = std::iter::once("create table test (x)") + .chain(std::iter::repeat("insert into test values ('hello world')").take(100)) + .collect_vec(); + CoreConnection::run(conn.clone(), Program::seq(&stmts), TestBuilder::default()).unwrap(); + + conn + } + + #[test] + fn test_libsql_conn_builder_driver() { + test_driver(1000, |b| { + let conn = setup_test_conn(); + CoreConnection::run(conn, Program::seq(&["select * from test"]), b) + }) + } + + #[ignore = "the new implementation doesn't steal if nobody is trying to acquire a write lock"] + #[tokio::test] + async fn txn_timeout_no_stealing() { + let tmp = tempdir().unwrap(); + let make_conn = MakeLegacyConnection::new( + tmp.path().into(), + PassthroughWalWrapper, + Default::default(), + Default::default(), + MetaStoreHandle::load(tmp.path()).unwrap(), + Arc::new([]), + 100000000, + 100000000, + DEFAULT_AUTO_CHECKPOINT, + watch::channel(None).1, + None, + Default::default(), + Arc::new(|_| unreachable!()), + Arc::new(|| EitherWAL::A(Sqlite3WalManager::default())), + ) + .await + .unwrap(); + + tokio::time::pause(); + let conn = make_conn.make_connection().await.unwrap(); + let _builder = CoreConnection::run( + conn.inner.clone(), + Program::seq(&["BEGIN IMMEDIATE"]), + TestBuilder::default(), + ) + .unwrap(); + assert!(!conn.inner.lock().conn.is_autocommit()); + + tokio::time::sleep(Duration::from_secs(1)).await; + + let builder = CoreConnection::run( + conn.inner.clone(), + Program::seq(&["create table test (c)"]), + TestBuilder::default(), + ) + .unwrap(); + assert!(!conn.is_autocommit().await.unwrap()); + assert!(matches!(builder.into_ret()[0], Err(Error::LibSqlTxTimeout))); + } + + #[tokio::test] + /// A bunch of txn try to acquire the lock, and never release it. They will try to steal the + /// lock one after the other. All txn should eventually acquire the write lock + async fn serialized_txn_timeouts() { + let tmp = tempdir().unwrap(); + let make_conn = MakeLegacyConnection::new( + tmp.path().into(), + PassthroughWalWrapper, + Default::default(), + Default::default(), + MetaStoreHandle::load(tmp.path()).unwrap(), + Arc::new([]), + 100000000, + 100000000, + DEFAULT_AUTO_CHECKPOINT, + watch::channel(None).1, + None, + Default::default(), + Arc::new(|_| unreachable!()), + Arc::new(|| EitherWAL::A(Sqlite3WalManager::default())), + ) + .await + .unwrap(); + + let mut set = JoinSet::new(); + for _ in 0..10 { + let conn = make_conn.make_connection().await.unwrap(); + set.spawn_blocking(move || { + let builder = CoreConnection::run( + conn.inner.clone(), + Program::seq(&["BEGIN IMMEDIATE"]), + TestBuilder::default(), + ) + .unwrap(); + let ret = &builder.into_ret()[0]; + assert!( + (ret.is_ok() && !conn.inner.lock().conn.is_autocommit()) + || (matches!(ret, Err(Error::RusqliteErrorExtended(_, 5))) + && conn.inner.lock().conn.is_autocommit()) + ); + }); + } + + tokio::time::pause(); + + while let Some(ret) = set.join_next().await { + assert!(ret.is_ok()); + // advance time by a bit more than the txn timeout + tokio::time::advance(TXN_TIMEOUT + Duration::from_millis(100)).await; + } + } + + #[tokio::test] + /// verify that releasing a txn before the timeout + async fn release_before_timeout() { + let tmp = tempdir().unwrap(); + let make_conn = MakeLegacyConnection::new( + tmp.path().into(), + PassthroughWalWrapper, + Default::default(), + Default::default(), + MetaStoreHandle::load(tmp.path()).unwrap(), + Arc::new([]), + 100000000, + 100000000, + DEFAULT_AUTO_CHECKPOINT, + watch::channel(None).1, + None, + Default::default(), + Arc::new(|_| unreachable!()), + Arc::new(|| EitherWAL::A(Sqlite3WalManager::default())), + ) + .await + .unwrap(); + + let conn1 = make_conn.make_connection().await.unwrap(); + tokio::task::spawn_blocking({ + let conn = conn1.clone(); + move || { + let builder = CoreConnection::run( + conn.inner.clone(), + Program::seq(&["BEGIN IMMEDIATE"]), + TestBuilder::default(), + ) + .unwrap(); + assert!(!conn.inner.lock().is_autocommit()); + assert!(builder.into_ret()[0].is_ok()); + } + }) + .await + .unwrap(); + + let conn2 = make_conn.make_connection().await.unwrap(); + let handle = tokio::task::spawn_blocking({ + let conn = conn2.clone(); + move || { + let before = Instant::now(); + let builder = CoreConnection::run( + conn.inner.clone(), + Program::seq(&["BEGIN IMMEDIATE"]), + TestBuilder::default(), + ) + .unwrap(); + assert!(!conn.inner.lock().is_autocommit()); + assert!(builder.into_ret()[0].is_ok()); + before.elapsed() + } + }); + + let wait_time = TXN_TIMEOUT / 10; + tokio::time::sleep(wait_time).await; + + tokio::task::spawn_blocking({ + let conn = conn1.clone(); + move || { + let builder = CoreConnection::run( + conn.inner.clone(), + Program::seq(&["COMMIT"]), + TestBuilder::default(), + ) + .unwrap(); + assert!(conn.inner.lock().is_autocommit()); + assert!(builder.into_ret()[0].is_ok()); + } + }) + .await + .unwrap(); + + let elapsed = handle.await.unwrap(); + + let epsilon = Duration::from_millis(100); + assert!((wait_time..wait_time + epsilon).contains(&elapsed)); + } + + /// The goal of this test is to run many concurrent transaction and hopefully catch a bug in + /// the lock stealing code. If this test becomes flaky check out the lock stealing code. + #[tokio::test] + async fn test_many_concurrent() { + let tmp = tempdir().unwrap(); + let make_conn = MakeLegacyConnection::new( + tmp.path().into(), + PassthroughWalWrapper, + Default::default(), + Default::default(), + MetaStoreHandle::load(tmp.path()).unwrap(), + Arc::new([]), + 100000000, + 100000000, + DEFAULT_AUTO_CHECKPOINT, + watch::channel(None).1, + None, + Default::default(), + Arc::new(|_| unreachable!()), + Arc::new(|| EitherWAL::A(Sqlite3WalManager::default())), + ) + .await + .unwrap(); + + let conn = make_conn.make_connection().await.unwrap(); + let (maker, manager) = metastore_connection_maker(None, tmp.path()).await.unwrap(); + let ctx = RequestContext::new( + Authenticated::FullAccess, + NamespaceName::default(), + MetaStore::new(Default::default(), tmp.path(), maker().unwrap(), manager) + .await + .unwrap(), + ); + conn.execute_program( + Program::seq(&["CREATE TABLE test (x)"]), + ctx.clone(), + TestBuilder::default(), + None, + ) + .await + .unwrap(); + let run_conn = |maker: Arc>| { + let ctx = ctx.clone(); + async move { + for _ in 0..1000 { + let conn = maker.make_connection().await.unwrap(); + let pgm = Program::seq(&["BEGIN IMMEDIATE", "INSERT INTO test VALUES (42)"]); + let res = conn + .execute_program(pgm, ctx.clone(), TestBuilder::default(), None) + .await + .unwrap() + .into_ret(); + for result in res { + result.unwrap(); + } + // with 99% change, commit the txn + if rand::thread_rng().gen_range(0..100) > 1 { + let pgm = Program::seq(&["INSERT INTO test VALUES (43)", "COMMIT"]); + let res = conn + .execute_program(pgm, ctx.clone(), TestBuilder::default(), None) + .await + .unwrap() + .into_ret(); + for result in res { + result.unwrap(); + } + } + } + } + }; + + let maker = Arc::new(make_conn); + let mut join_set = JoinSet::new(); + for _ in 0..3 { + join_set.spawn(run_conn(maker.clone())); + } + + let join_all = async move { + while let Some(next) = join_set.join_next().await { + next.unwrap(); + } + }; + + tokio::time::timeout(Duration::from_secs(60), join_all) + .await + .expect("timed out running connections"); + } + + #[tokio::test] + /// verify that releasing a txn before the timeout + async fn force_rollback_reset() { + let tmp = tempdir().unwrap(); + let make_conn = MakeLegacyConnection::new( + tmp.path().into(), + PassthroughWalWrapper, + Default::default(), + Default::default(), + MetaStoreHandle::load(tmp.path()).unwrap(), + Arc::new([]), + 100000000, + 100000000, + DEFAULT_AUTO_CHECKPOINT, + watch::channel(None).1, + None, + Default::default(), + Arc::new(|_| unreachable!()), + Arc::new(|| EitherWAL::A(Sqlite3WalManager::default())), + ) + .await + .unwrap(); + + let conn1 = make_conn.make_connection().await.unwrap(); + tokio::task::spawn_blocking({ + let conn = conn1.clone(); + move || { + let builder = CoreConnection::run( + conn.inner.clone(), + Program::seq(&["BEGIN IMMEDIATE"]), + TestBuilder::default(), + ) + .unwrap(); + assert!(!conn.inner.lock().is_autocommit()); + assert!(builder.into_ret()[0].is_ok()); + } + }) + .await + .unwrap(); + + let conn2 = make_conn.make_connection().await.unwrap(); + tokio::task::spawn_blocking({ + let conn = conn2.clone(); + move || { + let before = Instant::now(); + let builder = CoreConnection::run( + conn.inner.clone(), + Program::seq(&["BEGIN IMMEDIATE"]), + TestBuilder::default(), + ) + .unwrap(); + assert!(!conn.inner.lock().is_autocommit()); + assert!(builder.into_ret()[0].is_ok()); + before.elapsed() + } + }) + .await + .unwrap(); + + tokio::time::sleep(TXN_TIMEOUT * 2).await; + + tokio::task::spawn_blocking({ + let conn = conn1.clone(); + move || { + let builder = CoreConnection::run( + conn.inner.clone(), + Program::seq(&["SELECT 1;"]), + TestBuilder::default(), + ) + .unwrap(); + assert!(conn.inner.lock().is_autocommit()); + // timeout + assert!(builder.into_ret()[0].is_err()); + + let builder = CoreConnection::run( + conn.inner.clone(), + Program::seq(&["SELECT 1;"]), + TestBuilder::default(), + ) + .unwrap(); + assert!(conn.inner.lock().is_autocommit()); + // state reset + assert!(builder.into_ret()[0].is_ok()); + } + }) + .await + .unwrap(); + } +} diff --git a/libsql-server/src/connection/connection_manager.rs b/libsql-server/src/connection/connection_manager.rs index 8a95cd8e6e..b923f65ab6 100644 --- a/libsql-server/src/connection/connection_manager.rs +++ b/libsql-server/src/connection/connection_manager.rs @@ -15,27 +15,28 @@ use libsql_sys::wal::either::Either3; use libsql_sys::wal::wrapper::{WrapWal, WrappedWal}; use libsql_sys::wal::{CheckpointMode, Sqlite3Wal, Sqlite3WalManager, Wal}; use libsql_wal::io::StdIO; -use libsql_wal::storage::NoStorage; use libsql_wal::wal::{LibsqlWal, LibsqlWalManager}; use metrics::atomics::AtomicU64; use parking_lot::{Mutex, MutexGuard}; use rusqlite::ErrorCode; -use super::libsql::Connection; +use crate::SqldStorage; + +use super::connection_core::CoreConnection; use super::TXN_TIMEOUT; pub type ConnId = u64; #[cfg(feature = "durable-wal")] pub type InnerWalManager = - Either3, DurableWalManager>; + Either3, DurableWalManager>; #[cfg(feature = "durable-wal")] pub type InnerWal = Either3, DurableWal>; -#[cfg(not(feature = "durable-wal"))] -pub type InnerWalManager = Either>; #[cfg(not(feature = "durable-wal"))] +pub type InnerWalManager = Either>; +#[cfg(not(feature = "durable-wal"))] pub type InnerWal = Either>; pub type ManagedConnectionWal = WrappedWal; @@ -50,7 +51,7 @@ struct Slot { struct Abort(Arc); impl Abort { - fn from_conn(conn: &Arc>>) -> Self { + fn from_conn(conn: &Arc>>) -> Self { let conn = Arc::downgrade(conn); Self(Arc::new(move || { conn.upgrade() @@ -73,7 +74,7 @@ pub struct ConnectionManager { impl ConnectionManager { pub(super) fn register_connection( &self, - conn: &Arc>>, + conn: &Arc>>, id: ConnId, ) { let abort = Abort::from_conn(conn); diff --git a/libsql-server/src/connection/legacy.rs b/libsql-server/src/connection/legacy.rs new file mode 100644 index 0000000000..c8a240647e --- /dev/null +++ b/libsql-server/src/connection/legacy.rs @@ -0,0 +1,456 @@ +use std::ffi::c_int; +use std::path::{Path, PathBuf}; +use std::sync::atomic::AtomicBool; +use std::sync::Arc; + +use libsql_sys::wal::wrapper::{WrapWal, WrappedWal}; +use libsql_sys::wal::{BusyHandler, CheckpointCallback, Wal, WalManager}; +use libsql_sys::EncryptionConfig; +use parking_lot::Mutex; +use rusqlite::ffi::SQLITE_BUSY; +use rusqlite::{ErrorCode, OpenFlags}; +use tokio::sync::watch; +use tokio::time::Duration; + +use crate::error::Error; +use crate::metrics:: + DESCRIBE_COUNT +; +use crate::namespace::broadcasters::BroadcasterHandle; +use crate::namespace::meta_store::MetaStoreHandle; +use crate::namespace::ResolveNamespacePathFn; +use crate::query_result_builder::{QueryBuilderConfig, QueryResultBuilder}; +use crate::replication::FrameNo; +use crate::stats::Stats; +use crate::{record_time, Result}; + +use super::connection_core::CoreConnection; + +use super::connection_manager::{ + ConnectionManager, InnerWalManager, ManagedConnectionWal, ManagedConnectionWalWrapper, +}; +use super::program::{ + check_describe_auth, check_program_auth, DescribeResponse, +}; +use super::{MakeConnection, Program, RequestContext, TXN_TIMEOUT}; + +pub struct MakeLegacyConnection { + db_path: PathBuf, + wal_wrapper: W, + stats: Arc, + broadcaster: BroadcasterHandle, + config_store: MetaStoreHandle, + extensions: Arc<[PathBuf]>, + max_response_size: u64, + max_total_response_size: u64, + auto_checkpoint: u32, + current_frame_no_receiver: watch::Receiver>, + connection_manager: ConnectionManager, + /// return sqlite busy. To mitigate that, we hold on to one connection + _db: Option>, + encryption_config: Option, + block_writes: Arc, + resolve_attach_path: ResolveNamespacePathFn, + make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, +} + +impl MakeLegacyConnection +where + W: WrapWal + Send + 'static + Clone, +{ + #[allow(clippy::too_many_arguments)] + pub async fn new( + db_path: PathBuf, + wal_wrapper: W, + stats: Arc, + broadcaster: BroadcasterHandle, + config_store: MetaStoreHandle, + extensions: Arc<[PathBuf]>, + max_response_size: u64, + max_total_response_size: u64, + auto_checkpoint: u32, + current_frame_no_receiver: watch::Receiver>, + encryption_config: Option, + block_writes: Arc, + resolve_attach_path: ResolveNamespacePathFn, + make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, + ) -> Result { + let txn_timeout = config_store.get().txn_timeout.unwrap_or(TXN_TIMEOUT); + + let mut this = Self { + db_path, + stats, + broadcaster, + config_store, + extensions, + max_response_size, + max_total_response_size, + auto_checkpoint, + current_frame_no_receiver, + _db: None, + wal_wrapper, + encryption_config, + block_writes, + resolve_attach_path, + connection_manager: ConnectionManager::new(txn_timeout), + make_wal_manager, + }; + + let db = this.try_create_db().await?; + this._db = Some(db); + + Ok(this) + } + + /// Tries to create a database, retrying if the database is busy. + async fn try_create_db(&self) -> Result> { + // try 100 times to acquire initial db connection. + let mut retries = 0; + loop { + match self.make_connection().await { + Ok(conn) => return Ok(conn), + Err( + err @ Error::RusqliteError(rusqlite::Error::SqliteFailure( + rusqlite::ffi::Error { + code: ErrorCode::DatabaseBusy, + .. + }, + _, + )), + ) => { + if retries < 100 { + tracing::warn!("Database file is busy, retrying..."); + retries += 1; + tokio::time::sleep(Duration::from_millis(100)).await + } else { + Err(err)?; + } + } + Err(e) => Err(e)?, + } + } + } + + #[tracing::instrument(skip(self))] + pub(super) async fn make_connection(&self) -> Result> { + LegacyConnection::new( + self.db_path.clone(), + self.extensions.clone(), + self.wal_wrapper.clone(), + self.stats.clone(), + self.broadcaster.clone(), + self.config_store.clone(), + QueryBuilderConfig { + max_size: Some(self.max_response_size), + max_total_size: Some(self.max_total_response_size), + auto_checkpoint: self.auto_checkpoint, + encryption_config: self.encryption_config.clone(), + }, + self.current_frame_no_receiver.clone(), + self.block_writes.clone(), + self.resolve_attach_path.clone(), + self.connection_manager.clone(), + self.make_wal_manager.clone(), + ) + .await + } +} + +#[async_trait::async_trait] +impl MakeConnection for MakeLegacyConnection +where + W: WrapWal + Send + Sync + 'static + Clone, +{ + type Connection = LegacyConnection; + + async fn create(&self) -> Result { + self.make_connection().await + } +} + +pub struct LegacyConnection { + pub(super) inner: Arc>>>, +} + +#[cfg(test)] +impl LegacyConnection { + pub async fn new_test(path: &Path) -> Self { + #[cfg(not(feature = "durable-wal"))] + use libsql_sys::wal::either::Either as EitherWAL; + #[cfg(feature = "durable-wal")] + use libsql_sys::wal::either::Either3 as EitherWAL; + use libsql_sys::wal::Sqlite3WalManager; + + Self::new( + path.to_owned(), + Arc::new([]), + libsql_sys::wal::wrapper::PassthroughWalWrapper, + Default::default(), + Default::default(), + MetaStoreHandle::new_test(), + QueryBuilderConfig::default(), + tokio::sync::watch::channel(None).1, + Default::default(), + Arc::new(|_| unreachable!()), + ConnectionManager::new(TXN_TIMEOUT), + Arc::new(|| EitherWAL::A(Sqlite3WalManager::default())), + ) + .await + .unwrap() + } +} + +impl Clone for LegacyConnection { + fn clone(&self) -> Self { + Self { + inner: self.inner.clone(), + } + } +} + +#[derive(Clone, Copy)] +pub struct InhibitCheckpointWalWrapper { + close_only: bool, +} + +impl InhibitCheckpointWalWrapper { + pub fn new(close_only: bool) -> Self { + Self { close_only } + } +} + +impl WrapWal for InhibitCheckpointWalWrapper { + fn checkpoint( + &mut self, + wrapped: &mut W, + db: &mut libsql_sys::wal::Sqlite3Db, + mode: libsql_sys::wal::CheckpointMode, + busy_handler: Option<&mut dyn BusyHandler>, + sync_flags: u32, + buf: &mut [u8], + checkpoint_cb: Option<&mut dyn CheckpointCallback>, + in_wal: Option<&mut i32>, + backfilled: Option<&mut i32>, + ) -> libsql_sys::wal::Result<()> { + if !self.close_only { + wrapped.checkpoint( + db, + mode, + busy_handler, + sync_flags, + buf, + checkpoint_cb, + in_wal, + backfilled, + ) + } else { + tracing::warn!( + "checkpoint inhibited: this connection is not allowed to perform checkpoints" + ); + Err(rusqlite::ffi::Error::new(SQLITE_BUSY)) + } + } + + fn close>( + &mut self, + manager: &M, + wrapped: &mut W, + db: &mut libsql_sys::wal::Sqlite3Db, + sync_flags: c_int, + _scratch: Option<&mut [u8]>, + ) -> libsql_sys::wal::Result<()> { + // sqlite3 wall will not checkpoint if it's not provided with a scratch buffer. We take + // advantage of that to prevent checpoint on such connections. + manager.close(wrapped, db, sync_flags, None) + } +} + +pub type InhibitCheckpoint = WrappedWal; + +// Opens a connection with checkpoint inhibited +pub fn open_conn( + path: &Path, + wal_manager: T, + flags: Option, + encryption_config: Option, +) -> Result>, rusqlite::Error> +where + T: WalManager, +{ + open_conn_active_checkpoint( + path, + wal_manager.wrap(InhibitCheckpointWalWrapper::new(false)), + flags, + u32::MAX, + encryption_config, + ) +} + +/// Same as open_conn, but with checkpointing activated. +pub fn open_conn_active_checkpoint( + path: &Path, + wal_manager: T, + flags: Option, + auto_checkpoint: u32, + encryption_config: Option, +) -> Result, rusqlite::Error> +where + T: WalManager, +{ + let flags = flags.unwrap_or( + OpenFlags::SQLITE_OPEN_READ_WRITE + | OpenFlags::SQLITE_OPEN_CREATE + | OpenFlags::SQLITE_OPEN_URI + | OpenFlags::SQLITE_OPEN_NO_MUTEX, + ); + + libsql_sys::Connection::open( + path.join("data"), + flags, + wal_manager, + auto_checkpoint, + encryption_config, + ) +} + +impl LegacyConnection +where + W: WrapWal + Send + Clone + 'static, +{ + pub async fn new( + path: impl AsRef + Send + 'static, + extensions: Arc<[PathBuf]>, + wal_wrapper: W, + stats: Arc, + broadcaster: BroadcasterHandle, + config_store: MetaStoreHandle, + builder_config: QueryBuilderConfig, + current_frame_no_receiver: watch::Receiver>, + block_writes: Arc, + resolve_attach_path: ResolveNamespacePathFn, + connection_manager: ConnectionManager, + make_wal: Arc InnerWalManager + Sync + Send + 'static>, + ) -> crate::Result { + let (conn, id) = tokio::task::spawn_blocking({ + let connection_manager = connection_manager.clone(); + move || -> crate::Result<_> { + let manager = ManagedConnectionWalWrapper::new(connection_manager); + let id = manager.id(); + let wal = make_wal().wrap(manager).wrap(wal_wrapper); + + let conn = CoreConnection::new( + path.as_ref(), + extensions, + wal, + stats, + broadcaster, + config_store, + builder_config, + current_frame_no_receiver, + block_writes, + resolve_attach_path, + )?; + + let namespace = path + .as_ref() + .file_name() + .unwrap_or_default() + .to_os_string() + .into_string() + .unwrap_or_default(); + conn.raw().create_scalar_function( + "libsql_server_database_name", + 0, + rusqlite::functions::FunctionFlags::SQLITE_UTF8 + | rusqlite::functions::FunctionFlags::SQLITE_DETERMINISTIC, + move |_| Ok(namespace.clone()), + )?; + Ok((conn, id)) + } + }) + .await + .unwrap()?; + + let inner = Arc::new(Mutex::new(conn)); + + connection_manager.register_connection(&inner, id); + + Ok(Self { inner }) + } + + pub async fn execute( + &self, + pgm: Program, + ctx: RequestContext, + builder: B, + ) -> Result { + check_program_auth(&ctx, &pgm, &self.inner.lock().config())?; + let conn = self.inner.clone(); + CoreConnection::run_async(conn, pgm, builder).await + } +} + +#[async_trait::async_trait] +impl super::Connection for LegacyConnection +where + W: WrapWal + Clone + Send + 'static, +{ + async fn execute_program( + &self, + pgm: Program, + ctx: RequestContext, + builder: B, + _replication_index: Option, + ) -> Result { + record_time! { + "libsql_query_exec"; + self.execute(pgm, ctx, builder).await + } + } + + async fn describe( + &self, + sql: String, + ctx: RequestContext, + _replication_index: Option, + ) -> Result> { + DESCRIBE_COUNT.increment(1); + check_describe_auth(ctx)?; + let conn = self.inner.clone(); + let res = tokio::task::spawn_blocking(move || conn.lock().describe(&sql)) + .await + .unwrap(); + + Ok(res) + } + + async fn is_autocommit(&self) -> Result { + Ok(self.inner.lock().is_autocommit()) + } + + async fn checkpoint(&self) -> Result<()> { + let conn = self.inner.clone(); + tokio::task::spawn_blocking(move || conn.lock().checkpoint()) + .await + .unwrap()?; + Ok(()) + } + + async fn vacuum_if_needed(&self) -> Result<()> { + let conn = self.inner.clone(); + tokio::task::spawn_blocking(move || conn.lock().vacuum_if_needed()) + .await + .unwrap()?; + Ok(()) + } + + fn diagnostics(&self) -> String { + String::new() + } + + fn with_raw(&self, f: impl FnOnce(&mut rusqlite::Connection) -> R) -> R { + let mut inner = self.inner.lock(); + f(inner.raw_mut()) + } +} + diff --git a/libsql-server/src/connection/libsql.rs b/libsql-server/src/connection/libsql.rs index 9896164e55..1f31e5be5b 100644 --- a/libsql-server/src/connection/libsql.rs +++ b/libsql-server/src/connection/libsql.rs @@ -1,759 +1,101 @@ -use std::ffi::{c_int, c_void}; -use std::ops::Deref; use std::path::{Path, PathBuf}; -use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::atomic::AtomicBool; use std::sync::Arc; -use libsql_sys::wal::wrapper::{WrapWal, WrappedWal}; -use libsql_sys::wal::{BusyHandler, CheckpointCallback, Wal, WalManager}; use libsql_sys::EncryptionConfig; -use metrics::histogram; +use libsql_wal::wal::{LibsqlWal, LibsqlWalManager}; +use libsql_wal::io::StdIO; use parking_lot::Mutex; -use rusqlite::ffi::SQLITE_BUSY; -use rusqlite::{ErrorCode, OpenFlags}; use tokio::sync::watch; -use tokio::time::{Duration, Instant}; -use crate::error::Error; -use crate::metrics::{ - DESCRIBE_COUNT, PROGRAM_EXEC_COUNT, QUERY_CANCELED, VACUUM_COUNT, WAL_CHECKPOINT_COUNT, -}; +use crate::connection::program::check_program_auth; +use crate::metrics::DESCRIBE_COUNT; use crate::namespace::broadcasters::BroadcasterHandle; use crate::namespace::meta_store::MetaStoreHandle; use crate::namespace::ResolveNamespacePathFn; -use crate::query_analysis::StmtKind; use crate::query_result_builder::{QueryBuilderConfig, QueryResultBuilder}; +use crate::{record_time, SqldStorage, BLOCKING_RT}; use crate::replication::FrameNo; -use crate::stats::{Stats, StatsUpdateMessage}; -use crate::{record_time, Result, BLOCKING_RT}; +use crate::stats::Stats; +use crate::Result; -use super::connection_manager::{ - ConnectionManager, InnerWalManager, ManagedConnectionWal, ManagedConnectionWalWrapper, -}; -use super::program::{ - check_describe_auth, check_program_auth, DescribeCol, DescribeParam, DescribeResponse, Vm, -}; -use super::{MakeConnection, Program, RequestContext, TXN_TIMEOUT}; +use super::connection_core::CoreConnection; +use super::program::{check_describe_auth, DescribeResponse, Program}; +use super::{MakeConnection, RequestContext}; -pub struct MakeLibSqlConn { - db_path: PathBuf, - wal_wrapper: W, - stats: Arc, - broadcaster: BroadcasterHandle, - config_store: MetaStoreHandle, - extensions: Arc<[PathBuf]>, - max_response_size: u64, - max_total_response_size: u64, - auto_checkpoint: u32, - current_frame_no_receiver: watch::Receiver>, - connection_manager: ConnectionManager, - /// return sqlite busy. To mitigate that, we hold on to one connection - _db: Option>, - encryption_config: Option, - block_writes: Arc, - resolve_attach_path: ResolveNamespacePathFn, - make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, +pub struct MakeLibsqlConnection { + pub(crate) inner: Arc, } -impl MakeLibSqlConn -where - W: WrapWal + Send + 'static + Clone, -{ - #[allow(clippy::too_many_arguments)] - pub async fn new( - db_path: PathBuf, - wal_wrapper: W, - stats: Arc, - broadcaster: BroadcasterHandle, - config_store: MetaStoreHandle, - extensions: Arc<[PathBuf]>, - max_response_size: u64, - max_total_response_size: u64, - auto_checkpoint: u32, - current_frame_no_receiver: watch::Receiver>, - encryption_config: Option, - block_writes: Arc, - resolve_attach_path: ResolveNamespacePathFn, - make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, - ) -> Result { - let txn_timeout = config_store.get().txn_timeout.unwrap_or(TXN_TIMEOUT); - - let mut this = Self { - db_path, - stats, - broadcaster, - config_store, - extensions, - max_response_size, - max_total_response_size, - auto_checkpoint, - current_frame_no_receiver, - _db: None, - wal_wrapper, - encryption_config, - block_writes, - resolve_attach_path, - connection_manager: ConnectionManager::new(txn_timeout), - make_wal_manager, - }; - - let db = this.try_create_db().await?; - this._db = Some(db); - - Ok(this) - } - - /// Tries to create a database, retrying if the database is busy. - async fn try_create_db(&self) -> Result> { - // try 100 times to acquire initial db connection. - let mut retries = 0; - loop { - match self.make_connection().await { - Ok(conn) => return Ok(conn), - Err( - err @ Error::RusqliteError(rusqlite::Error::SqliteFailure( - rusqlite::ffi::Error { - code: ErrorCode::DatabaseBusy, - .. - }, - _, - )), - ) => { - if retries < 100 { - tracing::warn!("Database file is busy, retrying..."); - retries += 1; - tokio::time::sleep(Duration::from_millis(100)).await - } else { - Err(err)?; - } - } - Err(e) => Err(e)?, - } - } - } - - #[tracing::instrument(skip(self))] - async fn make_connection(&self) -> Result> { - LibSqlConnection::new( - self.db_path.clone(), - self.extensions.clone(), - self.wal_wrapper.clone(), - self.stats.clone(), - self.broadcaster.clone(), - self.config_store.clone(), - QueryBuilderConfig { - max_size: Some(self.max_response_size), - max_total_size: Some(self.max_total_response_size), - auto_checkpoint: self.auto_checkpoint, - encryption_config: self.encryption_config.clone(), - }, - self.current_frame_no_receiver.clone(), - self.block_writes.clone(), - self.resolve_attach_path.clone(), - self.connection_manager.clone(), - self.make_wal_manager.clone(), - ) - .await - } +pub struct MakeLibsqlConnectionInner { + pub(crate) db_path: Arc, + pub(crate) stats: Arc, + pub(crate) broadcaster: BroadcasterHandle, + pub(crate) config_store: MetaStoreHandle, + pub(crate) extensions: Arc<[PathBuf]>, + pub(crate) max_response_size: u64, + pub(crate) max_total_response_size: u64, + pub(crate) auto_checkpoint: u32, + pub(crate) current_frame_no_receiver: watch::Receiver>, + pub(crate) encryption_config: Option, + pub(crate) block_writes: Arc, + pub(crate) resolve_attach_path: ResolveNamespacePathFn, + pub(crate) wal_manager: LibsqlWalManager, } #[async_trait::async_trait] -impl MakeConnection for MakeLibSqlConn -where - W: WrapWal + Send + Sync + 'static + Clone, -{ - type Connection = LibSqlConnection; - - async fn create(&self) -> Result { - self.make_connection().await - } -} - -pub struct LibSqlConnection { - inner: Arc>>>, -} - -#[cfg(test)] -impl LibSqlConnection { - pub async fn new_test(path: &Path) -> Self { - #[cfg(not(feature = "durable-wal"))] - use libsql_sys::wal::either::Either as EitherWAL; - #[cfg(feature = "durable-wal")] - use libsql_sys::wal::either::Either3 as EitherWAL; - use libsql_sys::wal::Sqlite3WalManager; - - Self::new( - path.to_owned(), - Arc::new([]), - libsql_sys::wal::wrapper::PassthroughWalWrapper, - Default::default(), - Default::default(), - MetaStoreHandle::new_test(), - QueryBuilderConfig::default(), - tokio::sync::watch::channel(None).1, - Default::default(), - Arc::new(|_| unreachable!()), - ConnectionManager::new(TXN_TIMEOUT), - Arc::new(|| EitherWAL::A(Sqlite3WalManager::default())), - ) - .await - .unwrap() - } -} - -impl Clone for LibSqlConnection { - fn clone(&self) -> Self { - Self { - inner: self.inner.clone(), - } - } -} - -#[derive(Clone, Copy)] -pub struct InhibitCheckpointWalWrapper { - close_only: bool, -} - -impl InhibitCheckpointWalWrapper { - pub fn new(close_only: bool) -> Self { - Self { close_only } - } -} +impl MakeConnection for MakeLibsqlConnection { + type Connection = LibsqlConnection; + + async fn create(&self) -> crate::Result { + let inner = self.inner.clone(); + let core = BLOCKING_RT.spawn_blocking(move || -> crate::Result<_> { + let builder_config = QueryBuilderConfig { + max_size: Some(inner.max_response_size), + max_total_size: Some(inner.max_total_response_size), + auto_checkpoint: inner.auto_checkpoint, + encryption_config: inner.encryption_config.clone(), + }; -impl WrapWal for InhibitCheckpointWalWrapper { - fn checkpoint( - &mut self, - wrapped: &mut W, - db: &mut libsql_sys::wal::Sqlite3Db, - mode: libsql_sys::wal::CheckpointMode, - busy_handler: Option<&mut dyn BusyHandler>, - sync_flags: u32, - buf: &mut [u8], - checkpoint_cb: Option<&mut dyn CheckpointCallback>, - in_wal: Option<&mut i32>, - backfilled: Option<&mut i32>, - ) -> libsql_sys::wal::Result<()> { - if !self.close_only { - wrapped.checkpoint( - db, - mode, - busy_handler, - sync_flags, - buf, - checkpoint_cb, - in_wal, - backfilled, + // todo: handle retries + CoreConnection::new( + &inner.db_path, + inner.extensions.clone(), + inner.wal_manager.clone(), + inner.stats.clone(), + inner.broadcaster.clone(), + inner.config_store.clone(), + builder_config, + inner.current_frame_no_receiver.clone(), + inner.block_writes.clone(), + inner.resolve_attach_path.clone() ) - } else { - tracing::warn!( - "checkpoint inhibited: this connection is not allowed to perform checkpoints" - ); - Err(rusqlite::ffi::Error::new(SQLITE_BUSY)) - } - } + }).await.unwrap()?; - fn close>( - &mut self, - manager: &M, - wrapped: &mut W, - db: &mut libsql_sys::wal::Sqlite3Db, - sync_flags: c_int, - _scratch: Option<&mut [u8]>, - ) -> libsql_sys::wal::Result<()> { - // sqlite3 wall will not checkpoint if it's not provided with a scratch buffer. We take - // advantage of that to prevent checpoint on such connections. - manager.close(wrapped, db, sync_flags, None) + Ok(LibsqlConnection { inner: Arc::new(Mutex::new(core)) }) } } -pub type InhibitCheckpoint = WrappedWal; - -// Opens a connection with checkpoint inhibited -pub fn open_conn( - path: &Path, - wal_manager: T, - flags: Option, - encryption_config: Option, -) -> Result>, rusqlite::Error> -where - T: WalManager, -{ - open_conn_active_checkpoint( - path, - wal_manager.wrap(InhibitCheckpointWalWrapper::new(false)), - flags, - u32::MAX, - encryption_config, - ) +#[derive(Clone)] +pub struct LibsqlConnection { + inner: Arc>>>, } -/// Same as open_conn, but with checkpointing activated. -pub fn open_conn_active_checkpoint( - path: &Path, - wal_manager: T, - flags: Option, - auto_checkpoint: u32, - encryption_config: Option, -) -> Result, rusqlite::Error> -where - T: WalManager, -{ - let flags = flags.unwrap_or( - OpenFlags::SQLITE_OPEN_READ_WRITE - | OpenFlags::SQLITE_OPEN_CREATE - | OpenFlags::SQLITE_OPEN_URI - | OpenFlags::SQLITE_OPEN_NO_MUTEX, - ); - - libsql_sys::Connection::open( - path.join("data"), - flags, - wal_manager, - auto_checkpoint, - encryption_config, - ) -} - -impl LibSqlConnection -where - W: WrapWal + Send + Clone + 'static, -{ - pub async fn new( - path: impl AsRef + Send + 'static, - extensions: Arc<[PathBuf]>, - wal_wrapper: W, - stats: Arc, - broadcaster: BroadcasterHandle, - config_store: MetaStoreHandle, - builder_config: QueryBuilderConfig, - current_frame_no_receiver: watch::Receiver>, - block_writes: Arc, - resolve_attach_path: ResolveNamespacePathFn, - connection_manager: ConnectionManager, - make_wal: Arc InnerWalManager + Sync + Send + 'static>, - ) -> crate::Result { - let (conn, id) = tokio::task::spawn_blocking({ - let connection_manager = connection_manager.clone(); - move || -> crate::Result<_> { - let manager = ManagedConnectionWalWrapper::new(connection_manager); - let id = manager.id(); - let wal = make_wal().wrap(manager).wrap(wal_wrapper); - - let conn = Connection::new( - path.as_ref(), - extensions, - wal, - stats, - broadcaster, - config_store, - builder_config, - current_frame_no_receiver, - block_writes, - resolve_attach_path, - )?; - - let namespace = path - .as_ref() - .file_name() - .unwrap_or_default() - .to_os_string() - .into_string() - .unwrap_or_default(); - conn.conn.create_scalar_function( - "libsql_server_database_name", - 0, - rusqlite::functions::FunctionFlags::SQLITE_UTF8 - | rusqlite::functions::FunctionFlags::SQLITE_DETERMINISTIC, - move |_| Ok(namespace.clone()), - )?; - Ok((conn, id)) - } - }) - .await - .unwrap()?; - - let inner = Arc::new(Mutex::new(conn)); - - connection_manager.register_connection(&inner, id); - - Ok(Self { inner }) - } - - pub fn with_raw(&self, f: F) -> R - where - F: FnOnce(&mut rusqlite::Connection) -> R, - { - let mut inner = self.inner.lock(); - f(&mut inner.conn) - } - +impl LibsqlConnection { pub async fn execute( &self, pgm: Program, ctx: RequestContext, builder: B, - ) -> Result<(B, Program)> { - struct Bomb { - canceled: Arc, - defused: bool, - } - - impl Drop for Bomb { - fn drop(&mut self) { - if !self.defused { - tracing::trace!("cancelling request"); - self.canceled.store(true, Ordering::Relaxed); - } - } - } - - let canceled = { - let cancelled = self.inner.lock().canceled.clone(); - cancelled.store(false, Ordering::Relaxed); - cancelled - }; - - PROGRAM_EXEC_COUNT.increment(1); - - check_program_auth(&ctx, &pgm, &self.inner.lock().config_store.get())?; - - // create the bomb right before spawning the blocking task. - let mut bomb = Bomb { - canceled, - defused: false, - }; + ) -> Result { + check_program_auth(&ctx, &pgm, &self.inner.lock().config())?; let conn = self.inner.clone(); - let ret = BLOCKING_RT - .spawn_blocking(move || Connection::run(conn, pgm, builder)) - .await - .unwrap(); - - bomb.defused = true; - - ret - } -} - -pub(super) struct Connection { - conn: libsql_sys::Connection, - stats: Arc, - config_store: MetaStoreHandle, - builder_config: QueryBuilderConfig, - current_frame_no_receiver: watch::Receiver>, - block_writes: Arc, - resolve_attach_path: ResolveNamespacePathFn, - forced_rollback: bool, - broadcaster: BroadcasterHandle, - hooked: bool, - canceled: Arc, -} - -fn update_stats( - stats: &Stats, - sql: String, - rows_read: u64, - rows_written: u64, - mem_used: u64, - elapsed: Duration, -) { - stats.send(StatsUpdateMessage { - sql, - elapsed, - rows_read, - rows_written, - mem_used, - }); -} - -impl Connection { - fn new>( - path: &Path, - extensions: Arc<[PathBuf]>, - wal_manager: T, - stats: Arc, - broadcaster: BroadcasterHandle, - config_store: MetaStoreHandle, - builder_config: QueryBuilderConfig, - current_frame_no_receiver: watch::Receiver>, - block_writes: Arc, - resolve_attach_path: ResolveNamespacePathFn, - ) -> Result { - let conn = open_conn_active_checkpoint( - path, - wal_manager, - None, - builder_config.auto_checkpoint, - builder_config.encryption_config.clone(), - )?; - - let config = config_store.get(); - conn.pragma_update(None, "max_page_count", config.max_db_pages)?; - tracing::debug!("setting PRAGMA synchronous to {}", config.durability_mode); - conn.pragma_update(None, "synchronous", config.durability_mode)?; - - conn.set_limit( - rusqlite::limits::Limit::SQLITE_LIMIT_LENGTH, - config.max_row_size as i32, - ); - - unsafe { - const MAX_RETRIES: c_int = 8; - extern "C" fn do_nothing(_: *mut c_void, n: c_int) -> c_int { - (n < MAX_RETRIES) as _ - } - libsql_sys::ffi::sqlite3_busy_handler( - conn.handle(), - Some(do_nothing), - std::ptr::null_mut(), - ); - } - - let canceled = Arc::new(AtomicBool::new(false)); - - conn.progress_handler(100, { - let canceled = canceled.clone(); - Some(move || { - let canceled = canceled.load(Ordering::Relaxed); - if canceled { - QUERY_CANCELED.increment(1); - tracing::trace!("request canceled"); - } - canceled - }) - }); - - let this = Self { - conn, - stats, - config_store, - builder_config, - current_frame_no_receiver, - block_writes, - resolve_attach_path, - forced_rollback: false, - broadcaster, - hooked: false, - canceled, - }; - - for ext in extensions.iter() { - unsafe { - let _guard = rusqlite::LoadExtensionGuard::new(&this.conn).unwrap(); - if let Err(e) = this.conn.load_extension(ext, None) { - tracing::error!("failed to load extension: {}", ext.display()); - Err(e)?; - } - tracing::trace!("Loaded extension {}", ext.display()); - } - } - - Ok(this) - } - - fn run( - this: Arc>, - pgm: Program, - mut builder: B, - ) -> Result<(B, Program)> { - let (config, stats, block_writes, resolve_attach_path) = { - let mut lock = this.lock(); - let config = lock.config_store.get(); - let stats = lock.stats.clone(); - let block_writes = lock.block_writes.clone(); - let resolve_attach_path = lock.resolve_attach_path.clone(); - - lock.update_hooks(); - - (config, stats, block_writes, resolve_attach_path) - }; - - builder.init(&this.lock().builder_config)?; - let mut vm = Vm::new( - builder, - &pgm, - move |stmt_kind| { - let should_block = match stmt_kind { - StmtKind::Read | StmtKind::TxnBegin => config.block_reads, - StmtKind::Write => { - config.block_reads - || config.block_writes - || block_writes.load(Ordering::SeqCst) - } - StmtKind::DDL => config.block_reads || config.block_writes, - StmtKind::TxnEnd - | StmtKind::Release - | StmtKind::Savepoint - | StmtKind::Detach - | StmtKind::Attach(_) => false, - }; - - ( - should_block, - should_block.then(|| config.block_reason.clone()).flatten(), - ) - }, - move |sql, rows_read, rows_written, mem_used, elapsed| { - update_stats(&stats, sql, rows_read, rows_written, mem_used, elapsed) - }, - resolve_attach_path, - ); - - let mut has_timeout = false; - while !vm.finished() { - let mut conn = this.lock(); - - if conn.forced_rollback { - has_timeout = true; - conn.forced_rollback = false; - } - - // once there was a timeout, invalidate all the program steps - if has_timeout { - vm.builder().begin_step()?; - vm.builder().step_error(Error::LibSqlTxTimeout)?; - vm.builder().finish_step(0, None)?; - vm.advance(); - continue; - } - - let conn = conn.conn.deref(); - vm.step(conn)?; - } - - { - let mut lock = this.lock(); - let is_autocommit = lock.conn.is_autocommit(); - let current_fno = *lock.current_frame_no_receiver.borrow_and_update(); - vm.builder().finish(current_fno, is_autocommit)?; - } - - Ok((vm.into_builder(), pgm)) - } - - fn rollback(&self) { - if let Err(e) = self.conn.execute("ROLLBACK", ()) { - tracing::error!("failed to rollback: {e}"); - } - } - - pub(super) fn force_rollback(&mut self) { - if !self.forced_rollback { - self.rollback(); - self.forced_rollback = true; - } - } - - fn checkpoint(&self) -> Result<()> { - let start = Instant::now(); - self.conn - .query_row("PRAGMA wal_checkpoint(TRUNCATE)", (), |row| { - let status: i32 = row.get(0)?; - let wal_frames: i32 = row.get(1)?; - let moved_frames: i32 = row.get(2)?; - tracing::info!( - "WAL checkpoint successful, status: {}, WAL frames: {}, moved frames: {}", - status, - wal_frames, - moved_frames - ); - Ok(()) - })?; - WAL_CHECKPOINT_COUNT.increment(1); - histogram!("libsql_server_wal_checkpoint_time", start.elapsed()); - Ok(()) - } - - fn vacuum_if_needed(&self) -> Result<()> { - let page_count = self - .conn - .query_row("PRAGMA page_count", (), |row| row.get::<_, i64>(0))?; - let freelist_count = self - .conn - .query_row("PRAGMA freelist_count", (), |row| row.get::<_, i64>(0))?; - // NOTICE: don't bother vacuuming if we don't have at least 256MiB of data - if page_count >= 65536 && freelist_count * 2 > page_count { - tracing::info!("Vacuuming: pages={page_count} freelist={freelist_count}"); - self.conn.execute("VACUUM", ())?; - } else { - tracing::trace!("Not vacuuming: pages={page_count} freelist={freelist_count}"); - } - VACUUM_COUNT.increment(1); - Ok(()) - } - - fn describe(&self, sql: &str) -> crate::Result { - let stmt = self.conn.prepare(sql)?; - - let params = (1..=stmt.parameter_count()) - .map(|param_i| { - let name = stmt.parameter_name(param_i).map(|n| n.into()); - DescribeParam { name } - }) - .collect(); - - let cols = stmt - .columns() - .into_iter() - .map(|col| { - let name = col.name().into(); - let decltype = col.decl_type().map(|t| t.into()); - DescribeCol { name, decltype } - }) - .collect(); - - let is_explain = stmt.is_explain() != 0; - let is_readonly = stmt.readonly(); - Ok(DescribeResponse { - params, - cols, - is_explain, - is_readonly, - }) - } - - fn is_autocommit(&self) -> bool { - self.conn.is_autocommit() - } - - fn update_hooks(&mut self) { - let (update_fn, commit_fn, rollback_fn) = if self.hooked { - if self.broadcaster.active() { - return; - } - self.hooked = false; - (None, None, None) - } else { - let Some(broadcaster) = self.broadcaster.get() else { - return; - }; - - let update = broadcaster.clone(); - let update_fn = Some(move |action: _, _: &_, table: &_, _| { - update.notify(table, action); - }); - - let commit = broadcaster.clone(); - let commit_fn = Some(move || { - commit.commit(); - false // allow commit to go through - }); - - let rollback = broadcaster; - let rollback_fn = Some(move || rollback.rollback()); - (update_fn, commit_fn, rollback_fn) - }; - - self.conn.update_hook(update_fn); - self.conn.commit_hook(commit_fn); - self.conn.rollback_hook(rollback_fn); + CoreConnection::run_async(conn, pgm, builder).await } } #[async_trait::async_trait] -impl super::Connection for LibSqlConnection -where - W: WrapWal + Clone + Send + 'static, -{ +impl super::Connection for LibsqlConnection { async fn execute_program( &self, pgm: Program, @@ -763,7 +105,7 @@ where ) -> Result { record_time! { "libsql_query_exec"; - self.execute(pgm, ctx, builder).await.map(|(b, _)| b) + self.execute(pgm, ctx, builder).await } } @@ -806,427 +148,9 @@ where fn diagnostics(&self) -> String { String::new() } -} - -#[cfg(test)] -mod test { - use itertools::Itertools; - #[cfg(not(feature = "durable-wal"))] - use libsql_sys::wal::either::Either as EitherWAL; - #[cfg(feature = "durable-wal")] - use libsql_sys::wal::either::Either3 as EitherWAL; - use libsql_sys::wal::wrapper::PassthroughWalWrapper; - use libsql_sys::wal::{Sqlite3Wal, Sqlite3WalManager}; - use rand::Rng; - use tempfile::tempdir; - use tokio::task::JoinSet; - - use crate::auth::Authenticated; - use crate::connection::{Connection as _, TXN_TIMEOUT}; - use crate::namespace::meta_store::{metastore_connection_maker, MetaStore}; - use crate::namespace::NamespaceName; - use crate::query_result_builder::test::{test_driver, TestBuilder}; - use crate::query_result_builder::QueryResultBuilder; - use crate::DEFAULT_AUTO_CHECKPOINT; - - use super::*; - - fn setup_test_conn() -> Arc>> { - let conn = Connection { - conn: libsql_sys::Connection::test(), - stats: Arc::new(Stats::default()), - config_store: MetaStoreHandle::new_test(), - builder_config: QueryBuilderConfig::default(), - current_frame_no_receiver: watch::channel(None).1, - block_writes: Default::default(), - resolve_attach_path: Arc::new(|_| unreachable!()), - forced_rollback: false, - broadcaster: Default::default(), - hooked: false, - canceled: Arc::new(false.into()), - }; - - let conn = Arc::new(Mutex::new(conn)); - - let stmts = std::iter::once("create table test (x)") - .chain(std::iter::repeat("insert into test values ('hello world')").take(100)) - .collect_vec(); - Connection::run(conn.clone(), Program::seq(&stmts), TestBuilder::default()).unwrap(); - - conn - } - - #[test] - fn test_libsql_conn_builder_driver() { - test_driver(1000, |b| { - let conn = setup_test_conn(); - Connection::run(conn, Program::seq(&["select * from test"]), b) - }) - } - - #[ignore = "the new implementation doesn't steal if nobody is trying to acquire a write lock"] - #[tokio::test] - async fn txn_timeout_no_stealing() { - let tmp = tempdir().unwrap(); - let make_conn = MakeLibSqlConn::new( - tmp.path().into(), - PassthroughWalWrapper, - Default::default(), - Default::default(), - MetaStoreHandle::load(tmp.path()).unwrap(), - Arc::new([]), - 100000000, - 100000000, - DEFAULT_AUTO_CHECKPOINT, - watch::channel(None).1, - None, - Default::default(), - Arc::new(|_| unreachable!()), - Arc::new(|| EitherWAL::A(Sqlite3WalManager::default())), - ) - .await - .unwrap(); - - tokio::time::pause(); - let conn = make_conn.make_connection().await.unwrap(); - let _builder = Connection::run( - conn.inner.clone(), - Program::seq(&["BEGIN IMMEDIATE"]), - TestBuilder::default(), - ) - .unwrap() - .0; - assert!(!conn.inner.lock().conn.is_autocommit()); - - tokio::time::sleep(Duration::from_secs(1)).await; - - let builder = Connection::run( - conn.inner.clone(), - Program::seq(&["create table test (c)"]), - TestBuilder::default(), - ) - .unwrap() - .0; - assert!(!conn.is_autocommit().await.unwrap()); - assert!(matches!(builder.into_ret()[0], Err(Error::LibSqlTxTimeout))); - } - - #[tokio::test] - /// A bunch of txn try to acquire the lock, and never release it. They will try to steal the - /// lock one after the other. All txn should eventually acquire the write lock - async fn serialized_txn_timeouts() { - let tmp = tempdir().unwrap(); - let make_conn = MakeLibSqlConn::new( - tmp.path().into(), - PassthroughWalWrapper, - Default::default(), - Default::default(), - MetaStoreHandle::load(tmp.path()).unwrap(), - Arc::new([]), - 100000000, - 100000000, - DEFAULT_AUTO_CHECKPOINT, - watch::channel(None).1, - None, - Default::default(), - Arc::new(|_| unreachable!()), - Arc::new(|| EitherWAL::A(Sqlite3WalManager::default())), - ) - .await - .unwrap(); - - let mut set = JoinSet::new(); - for _ in 0..10 { - let conn = make_conn.make_connection().await.unwrap(); - set.spawn_blocking(move || { - let builder = Connection::run( - conn.inner.clone(), - Program::seq(&["BEGIN IMMEDIATE"]), - TestBuilder::default(), - ) - .unwrap() - .0; - let ret = &builder.into_ret()[0]; - assert!( - (ret.is_ok() && !conn.inner.lock().conn.is_autocommit()) - || (matches!(ret, Err(Error::RusqliteErrorExtended(_, 5))) - && conn.inner.lock().conn.is_autocommit()) - ); - }); - } - - tokio::time::pause(); - - while let Some(ret) = set.join_next().await { - assert!(ret.is_ok()); - // advance time by a bit more than the txn timeout - tokio::time::advance(TXN_TIMEOUT + Duration::from_millis(100)).await; - } - } - - #[tokio::test] - /// verify that releasing a txn before the timeout - async fn release_before_timeout() { - let tmp = tempdir().unwrap(); - let make_conn = MakeLibSqlConn::new( - tmp.path().into(), - PassthroughWalWrapper, - Default::default(), - Default::default(), - MetaStoreHandle::load(tmp.path()).unwrap(), - Arc::new([]), - 100000000, - 100000000, - DEFAULT_AUTO_CHECKPOINT, - watch::channel(None).1, - None, - Default::default(), - Arc::new(|_| unreachable!()), - Arc::new(|| EitherWAL::A(Sqlite3WalManager::default())), - ) - .await - .unwrap(); - - let conn1 = make_conn.make_connection().await.unwrap(); - tokio::task::spawn_blocking({ - let conn = conn1.clone(); - move || { - let builder = Connection::run( - conn.inner.clone(), - Program::seq(&["BEGIN IMMEDIATE"]), - TestBuilder::default(), - ) - .unwrap() - .0; - assert!(!conn.inner.lock().is_autocommit()); - assert!(builder.into_ret()[0].is_ok()); - } - }) - .await - .unwrap(); - - let conn2 = make_conn.make_connection().await.unwrap(); - let handle = tokio::task::spawn_blocking({ - let conn = conn2.clone(); - move || { - let before = Instant::now(); - let builder = Connection::run( - conn.inner.clone(), - Program::seq(&["BEGIN IMMEDIATE"]), - TestBuilder::default(), - ) - .unwrap() - .0; - assert!(!conn.inner.lock().is_autocommit()); - assert!(builder.into_ret()[0].is_ok()); - before.elapsed() - } - }); - - let wait_time = TXN_TIMEOUT / 10; - tokio::time::sleep(wait_time).await; - - tokio::task::spawn_blocking({ - let conn = conn1.clone(); - move || { - let builder = Connection::run( - conn.inner.clone(), - Program::seq(&["COMMIT"]), - TestBuilder::default(), - ) - .unwrap() - .0; - assert!(conn.inner.lock().is_autocommit()); - assert!(builder.into_ret()[0].is_ok()); - } - }) - .await - .unwrap(); - - let elapsed = handle.await.unwrap(); - - let epsilon = Duration::from_millis(100); - assert!((wait_time..wait_time + epsilon).contains(&elapsed)); - } - - /// The goal of this test is to run many concurrent transaction and hopefully catch a bug in - /// the lock stealing code. If this test becomes flaky check out the lock stealing code. - #[tokio::test] - async fn test_many_concurrent() { - let tmp = tempdir().unwrap(); - let make_conn = MakeLibSqlConn::new( - tmp.path().into(), - PassthroughWalWrapper, - Default::default(), - Default::default(), - MetaStoreHandle::load(tmp.path()).unwrap(), - Arc::new([]), - 100000000, - 100000000, - DEFAULT_AUTO_CHECKPOINT, - watch::channel(None).1, - None, - Default::default(), - Arc::new(|_| unreachable!()), - Arc::new(|| EitherWAL::A(Sqlite3WalManager::default())), - ) - .await - .unwrap(); - - let conn = make_conn.make_connection().await.unwrap(); - let (maker, manager) = metastore_connection_maker(None, tmp.path()).await.unwrap(); - let ctx = RequestContext::new( - Authenticated::FullAccess, - NamespaceName::default(), - MetaStore::new(Default::default(), tmp.path(), maker().unwrap(), manager) - .await - .unwrap(), - ); - conn.execute_program( - Program::seq(&["CREATE TABLE test (x)"]), - ctx.clone(), - TestBuilder::default(), - None, - ) - .await - .unwrap(); - let run_conn = |maker: Arc>| { - let ctx = ctx.clone(); - async move { - for _ in 0..1000 { - let conn = maker.make_connection().await.unwrap(); - let pgm = Program::seq(&["BEGIN IMMEDIATE", "INSERT INTO test VALUES (42)"]); - let res = conn - .execute_program(pgm, ctx.clone(), TestBuilder::default(), None) - .await - .unwrap() - .into_ret(); - for result in res { - result.unwrap(); - } - // with 99% change, commit the txn - if rand::thread_rng().gen_range(0..100) > 1 { - let pgm = Program::seq(&["INSERT INTO test VALUES (43)", "COMMIT"]); - let res = conn - .execute_program(pgm, ctx.clone(), TestBuilder::default(), None) - .await - .unwrap() - .into_ret(); - for result in res { - result.unwrap(); - } - } - } - } - }; - - let maker = Arc::new(make_conn); - let mut join_set = JoinSet::new(); - for _ in 0..3 { - join_set.spawn(run_conn(maker.clone())); - } - - let join_all = async move { - while let Some(next) = join_set.join_next().await { - next.unwrap(); - } - }; - - tokio::time::timeout(Duration::from_secs(60), join_all) - .await - .expect("timed out running connections"); - } - - #[tokio::test] - /// verify that releasing a txn before the timeout - async fn force_rollback_reset() { - let tmp = tempdir().unwrap(); - let make_conn = MakeLibSqlConn::new( - tmp.path().into(), - PassthroughWalWrapper, - Default::default(), - Default::default(), - MetaStoreHandle::load(tmp.path()).unwrap(), - Arc::new([]), - 100000000, - 100000000, - DEFAULT_AUTO_CHECKPOINT, - watch::channel(None).1, - None, - Default::default(), - Arc::new(|_| unreachable!()), - Arc::new(|| EitherWAL::A(Sqlite3WalManager::default())), - ) - .await - .unwrap(); - let conn1 = make_conn.make_connection().await.unwrap(); - tokio::task::spawn_blocking({ - let conn = conn1.clone(); - move || { - let builder = Connection::run( - conn.inner.clone(), - Program::seq(&["BEGIN IMMEDIATE"]), - TestBuilder::default(), - ) - .unwrap() - .0; - assert!(!conn.inner.lock().is_autocommit()); - assert!(builder.into_ret()[0].is_ok()); - } - }) - .await - .unwrap(); - - let conn2 = make_conn.make_connection().await.unwrap(); - tokio::task::spawn_blocking({ - let conn = conn2.clone(); - move || { - let before = Instant::now(); - let builder = Connection::run( - conn.inner.clone(), - Program::seq(&["BEGIN IMMEDIATE"]), - TestBuilder::default(), - ) - .unwrap() - .0; - assert!(!conn.inner.lock().is_autocommit()); - assert!(builder.into_ret()[0].is_ok()); - before.elapsed() - } - }) - .await - .unwrap(); - - tokio::time::sleep(TXN_TIMEOUT * 2).await; - - tokio::task::spawn_blocking({ - let conn = conn1.clone(); - move || { - let builder = Connection::run( - conn.inner.clone(), - Program::seq(&["SELECT 1;"]), - TestBuilder::default(), - ) - .unwrap() - .0; - assert!(conn.inner.lock().is_autocommit()); - // timeout - assert!(builder.into_ret()[0].is_err()); - - let builder = Connection::run( - conn.inner.clone(), - Program::seq(&["SELECT 1;"]), - TestBuilder::default(), - ) - .unwrap() - .0; - assert!(conn.inner.lock().is_autocommit()); - // state reset - assert!(builder.into_ret()[0].is_ok()); - } - }) - .await - .unwrap(); + fn with_raw(&self, f: impl FnOnce(&mut rusqlite::Connection) -> R) -> R { + let mut inner = self.inner.lock(); + f(inner.raw_mut()) } } diff --git a/libsql-server/src/connection/mod.rs b/libsql-server/src/connection/mod.rs index e554130a80..65be8b2533 100644 --- a/libsql-server/src/connection/mod.rs +++ b/libsql-server/src/connection/mod.rs @@ -26,9 +26,11 @@ use self::program::{Cond, DescribeResponse, Program, Step}; pub mod config; pub mod connection_manager; pub mod dump; -pub mod libsql; +pub mod legacy; pub mod program; pub mod write_proxy; +pub mod libsql; +mod connection_core; #[cfg(not(test))] const TXN_TIMEOUT: Duration = Duration::from_secs(5); @@ -169,6 +171,8 @@ pub trait Connection: Send + Sync + 'static { async fn vacuum_if_needed(&self) -> Result<()>; fn diagnostics(&self) -> String; + + fn with_raw(&self, f: impl FnOnce(&mut rusqlite::Connection) -> R) -> R; } fn make_batch_program(batch: Vec) -> Vec { @@ -444,6 +448,10 @@ impl Connection for TrackedConnection { fn diagnostics(&self) -> String { self.inner.diagnostics() } + + fn with_raw(&self, f: impl FnOnce(&mut rusqlite::Connection) -> R) -> R { + self.inner.with_raw(f) + } } #[cfg(test)] @@ -489,6 +497,10 @@ pub mod test { fn diagnostics(&self) -> String { "dummy".into() } + + fn with_raw(&self, _f: impl FnOnce(&mut rusqlite::Connection) -> R) -> R { + todo!() + } } #[tokio::test] diff --git a/libsql-server/src/connection/program.rs b/libsql-server/src/connection/program.rs index f128c7538f..785a7b894b 100644 --- a/libsql-server/src/connection/program.rs +++ b/libsql-server/src/connection/program.rs @@ -1,3 +1,4 @@ +use std::sync::Arc; use std::time::{Duration, Instant}; use metrics::{histogram, increment_counter}; @@ -14,14 +15,14 @@ use crate::query_result_builder::QueryResultBuilder; use super::config::DatabaseConfig; use super::RequestContext; -#[derive(Debug, serde::Serialize, serde::Deserialize)] +#[derive(Debug, serde::Serialize, serde::Deserialize, Clone)] pub struct Program { - pub steps: Vec, + pub steps: Arc>, } impl Program { pub fn new(steps: Vec) -> Self { - Self { steps } + Self { steps: steps.into() } } pub fn is_read_only(&self) -> bool { @@ -29,7 +30,11 @@ impl Program { } pub fn steps(&self) -> &[Step] { - self.steps.as_slice() + &self.steps + } + + pub fn steps_mut(&mut self) -> Option<&mut Vec> { + Arc::get_mut(&mut self.steps) } #[cfg(test)] diff --git a/libsql-server/src/connection/write_proxy.rs b/libsql-server/src/connection/write_proxy.rs index 6e66dce37b..1531206637 100644 --- a/libsql-server/src/connection/write_proxy.rs +++ b/libsql-server/src/connection/write_proxy.rs @@ -1,5 +1,3 @@ -use std::path::PathBuf; -use std::sync::atomic::AtomicBool; use std::sync::Arc; use futures_core::future::BoxFuture; @@ -8,7 +6,6 @@ use libsql_replication::rpc::proxy::proxy_client::ProxyClient; use libsql_replication::rpc::proxy::{ exec_req, exec_resp, ExecReq, ExecResp, StreamDescribeReq, StreamProgramReq, }; -use libsql_sys::wal::wrapper::PassthroughWalWrapper; use libsql_sys::EncryptionConfig; use parking_lot::Mutex as PMutex; use tokio::sync::{mpsc, watch, Mutex}; @@ -19,72 +16,45 @@ use tonic::{Request, Streaming}; use crate::connection::program::{DescribeCol, DescribeParam}; use crate::error::Error; use crate::metrics::{REPLICA_LOCAL_EXEC_MISPREDICT, REPLICA_LOCAL_PROGRAM_EXEC}; -use crate::namespace::broadcasters::BroadcasterHandle; -use crate::namespace::meta_store::MetaStoreHandle; -use crate::namespace::ResolveNamespacePathFn; use crate::query_analysis::TxnStatus; use crate::query_result_builder::{QueryBuilderConfig, QueryResultBuilder}; use crate::replication::FrameNo; use crate::stats::Stats; use crate::{Result, DEFAULT_AUTO_CHECKPOINT}; -use super::connection_manager::InnerWalManager; -use super::libsql::{LibSqlConnection, MakeLibSqlConn}; use super::program::DescribeResponse; use super::{Connection, RequestContext}; use super::{MakeConnection, Program}; pub type RpcStream = Streaming; -pub struct MakeWriteProxyConn { +pub struct MakeWriteProxyConn { client: ProxyClient, stats: Arc, applied_frame_no_receiver: watch::Receiver>, max_response_size: u64, max_total_response_size: u64, primary_replication_index: Option, - make_read_only_conn: MakeLibSqlConn, + // make_read_only_conn: MakeLegacyConnection, + make_read_only_conn: M, encryption_config: Option, } -impl MakeWriteProxyConn { +impl MakeWriteProxyConn { #[allow(clippy::too_many_arguments)] - pub async fn new( - db_path: PathBuf, - extensions: Arc<[PathBuf]>, + pub fn new( channel: Channel, uri: tonic::transport::Uri, stats: Arc, - broadcaster: BroadcasterHandle, - config_store: MetaStoreHandle, applied_frame_no_receiver: watch::Receiver>, max_response_size: u64, max_total_response_size: u64, primary_replication_index: Option, encryption_config: Option, - resolve_attach_path: ResolveNamespacePathFn, - make_wal_manager: Arc InnerWalManager + Send + Sync + 'static>, - ) -> crate::Result { + make_read_only_conn: M, + ) -> Self { let client = ProxyClient::with_origin(channel, uri); - let make_read_only_conn = MakeLibSqlConn::new( - db_path.clone(), - PassthroughWalWrapper, - stats.clone(), - broadcaster, - config_store.clone(), - extensions.clone(), - max_response_size, - max_total_response_size, - DEFAULT_AUTO_CHECKPOINT, - applied_frame_no_receiver.clone(), - encryption_config.clone(), - Arc::new(AtomicBool::new(false)), // this is always false for write proxy - resolve_attach_path, - make_wal_manager, - ) - .await?; - - Ok(Self { + Self { client, stats, applied_frame_no_receiver, @@ -93,13 +63,15 @@ impl MakeWriteProxyConn { make_read_only_conn, primary_replication_index, encryption_config, - }) + } } } #[async_trait::async_trait] -impl MakeConnection for MakeWriteProxyConn { - type Connection = WriteProxyConnection; +impl MakeConnection for MakeWriteProxyConn +where M: MakeConnection, +{ + type Connection = WriteProxyConnection; async fn create(&self) -> Result { Ok(WriteProxyConnection::new( self.client.clone(), @@ -117,9 +89,9 @@ impl MakeConnection for MakeWriteProxyConn { } } -pub struct WriteProxyConnection { +pub struct WriteProxyConnection { /// Lazily initialized read connection - read_conn: LibSqlConnection, + read_conn: C, write_proxy: ProxyClient, state: Mutex, /// FrameNo of the last write performed by this connection on the primary. @@ -136,7 +108,7 @@ pub struct WriteProxyConnection { primary_replication_index: Option, } -impl WriteProxyConnection { +impl WriteProxyConnection { #[allow(clippy::too_many_arguments)] fn new( write_proxy: ProxyClient, @@ -144,7 +116,7 @@ impl WriteProxyConnection { applied_frame_no_receiver: watch::Receiver>, builder_config: QueryBuilderConfig, primary_replication_index: Option, - read_conn: LibSqlConnection, + read_conn: C, ) -> Result { Ok(Self { read_conn, @@ -190,7 +162,7 @@ impl WriteProxyConnection { *status = TxnStatus::Invalid; let res = self .with_remote_conn(ctx, self.builder_config.clone(), |conn| { - Box::pin(conn.execute(pgm, builder)) + Box::pin(conn.execute(pgm.clone(), builder)) }) .await; @@ -452,7 +424,7 @@ where } #[async_trait::async_trait] -impl Connection for WriteProxyConnection { +impl Connection for WriteProxyConnection { async fn execute_program( &self, pgm: Program, @@ -471,7 +443,9 @@ impl Connection for WriteProxyConnection { // We know that this program won't perform any writes. We attempt to run it on the // replica. If it leaves an open transaction, then this program is an interactive // transaction, so we rollback the replica, and execute again on the primary. - let (builder, pgm) = self.read_conn.execute(pgm, ctx.clone(), builder).await?; + let builder = self + .read_conn + .execute_program(pgm.clone(), ctx.clone(), builder, replication_index).await?; if !self.read_conn.is_autocommit().await? { REPLICA_LOCAL_EXEC_MISPREDICT.increment(1); self.read_conn.rollback(ctx.clone()).await?; @@ -517,6 +491,10 @@ impl Connection for WriteProxyConnection { fn diagnostics(&self) -> String { format!("{:?}", self.state) } + + fn with_raw(&self, _f: impl FnOnce(&mut rusqlite::Connection) -> R) -> R { + panic!("no raw connection for write proxy"); + } } #[cfg(test)] diff --git a/libsql-server/src/database/libsql_primary.rs b/libsql-server/src/database/libsql_primary.rs new file mode 100644 index 0000000000..5e9e530954 --- /dev/null +++ b/libsql-server/src/database/libsql_primary.rs @@ -0,0 +1,23 @@ +use std::sync::Arc; +use std::sync::atomic::AtomicBool; + +use crate::connection::libsql::{LibsqlConnection, MakeLibsqlConnection}; +use crate::connection::{MakeThrottledConnection, TrackedConnection}; + +pub type LibsqlPrimaryConnection = TrackedConnection; +pub type LibsqlPrimaryConnectionMaker = MakeThrottledConnection; + +pub struct LibsqlPrimaryDatabase { + pub connection_maker: Arc, + pub block_writes: Arc, +} + +impl LibsqlPrimaryDatabase { + pub fn connection_maker(&self) -> Arc { + self.connection_maker.clone() + } + + pub fn destroy(self) { } + + pub async fn shutdown(self) -> anyhow::Result<()> { Ok(()) } +} diff --git a/libsql-server/src/database/libsql_replica.rs b/libsql-server/src/database/libsql_replica.rs new file mode 100644 index 0000000000..86435be292 --- /dev/null +++ b/libsql-server/src/database/libsql_replica.rs @@ -0,0 +1,31 @@ +use std::sync::Arc; + +use libsql_replication::rpc::proxy::ExecResp; +use tonic::Streaming; + +use crate::connection::libsql::{LibsqlConnection, MakeLibsqlConnection}; +use crate::connection::write_proxy::{MakeWriteProxyConn, WriteProxyConnection}; +use crate::connection::{MakeThrottledConnection, TrackedConnection}; + +use super::Result; + +pub type LibsqlReplicaConnection = TrackedConnection< + WriteProxyConnection, LibsqlConnection>, +>; +type LibsqlReplicaConnectionMaker = MakeThrottledConnection>; + +pub struct LibsqlReplicaDatabase { + pub connection_maker: Arc, +} + +impl LibsqlReplicaDatabase { + pub fn connection_maker(&self) -> Arc { + self.connection_maker.clone() + } + + pub fn destroy(self) {} + + pub async fn shutdown(self) -> Result<()> { + Ok(()) + } +} diff --git a/libsql-server/src/database/mod.rs b/libsql-server/src/database/mod.rs index 5af73decc0..e6d59d3998 100644 --- a/libsql-server/src/database/mod.rs +++ b/libsql-server/src/database/mod.rs @@ -1,4 +1,5 @@ use std::fmt; +use std::sync::atomic::AtomicBool; use std::sync::Arc; use bottomless::replicator::Replicator; @@ -7,6 +8,8 @@ use tokio::sync::watch; use crate::connection::{MakeConnection, RequestContext}; use crate::replication::{FrameNo, ReplicationLogger}; +pub use self::libsql_replica::{LibsqlReplicaConnection, LibsqlReplicaDatabase}; +pub use self::libsql_primary::{LibsqlPrimaryConnection, LibsqlPrimaryDatabase, LibsqlPrimaryConnectionMaker}; pub use self::primary::{PrimaryConnection, PrimaryConnectionMaker, PrimaryDatabase}; pub use self::replica::{ReplicaConnection, ReplicaDatabase}; pub use self::schema::{SchemaConnection, SchemaDatabase}; @@ -14,6 +17,8 @@ pub use self::schema::{SchemaConnection, SchemaDatabase}; mod primary; mod replica; mod schema; +mod libsql_primary; +mod libsql_replica; #[derive(Debug, Clone, serde::Deserialize, Copy)] #[serde(rename_all = "snake_case")] @@ -45,7 +50,10 @@ pub type Result = anyhow::Result; pub enum Connection { Primary(PrimaryConnection), Replica(ReplicaConnection), - Schema(SchemaConnection), + Schema(SchemaConnection), + LibsqlPrimary(LibsqlPrimaryConnection), + LibsqlReplica(LibsqlReplicaConnection), + LibsqlSchema(SchemaConnection), } impl fmt::Debug for Connection { @@ -54,6 +62,9 @@ impl fmt::Debug for Connection { Self::Primary(_) => write!(f, "Primary"), Self::Replica(_) => write!(f, "Replica"), Self::Schema(_) => write!(f, "Schema"), + Self::LibsqlPrimary(_) => write!(f, "LibsqlPrimaryConnection"), + Self::LibsqlReplica(_) => write!(f, "LibsqlReplicaConnection"), + Self::LibsqlSchema(_) => write!(f, "LibsqlSchema"), } } } @@ -64,7 +75,7 @@ impl Connection { /// [`Primary`]: Connection::Primary #[must_use] pub fn is_primary(&self) -> bool { - matches!(self, Self::Primary(..)) + matches!(self, Self::Primary(..) | Self::LibsqlPrimary(_)) } } @@ -90,6 +101,18 @@ impl crate::connection::Connection for Connection { conn.execute_program(pgm, ctx, response_builder, replication_index) .await } + Connection::LibsqlPrimary(conn) => { + conn.execute_program(pgm, ctx, response_builder, replication_index) + .await + } + Connection::LibsqlReplica(conn) => { + conn.execute_program(pgm, ctx, response_builder, replication_index) + .await + } + Connection::LibsqlSchema(conn) => { + conn.execute_program(pgm, ctx, response_builder, replication_index) + .await + } } } @@ -103,6 +126,9 @@ impl crate::connection::Connection for Connection { Connection::Primary(conn) => conn.describe(sql, ctx, replication_index).await, Connection::Replica(conn) => conn.describe(sql, ctx, replication_index).await, Connection::Schema(conn) => conn.describe(sql, ctx, replication_index).await, + Connection::LibsqlPrimary(conn) => conn.describe(sql, ctx, replication_index).await, + Connection::LibsqlReplica(conn) => conn.describe(sql, ctx, replication_index).await, + Connection::LibsqlSchema(conn) => conn.describe(sql, ctx, replication_index).await, } } @@ -111,6 +137,9 @@ impl crate::connection::Connection for Connection { Connection::Primary(conn) => conn.is_autocommit().await, Connection::Replica(conn) => conn.is_autocommit().await, Connection::Schema(conn) => conn.is_autocommit().await, + Connection::LibsqlPrimary(conn) => conn.is_autocommit().await, + Connection::LibsqlReplica(conn) => conn.is_autocommit().await, + Connection::LibsqlSchema(conn) => conn.is_autocommit().await, } } @@ -119,6 +148,9 @@ impl crate::connection::Connection for Connection { Connection::Primary(conn) => conn.checkpoint().await, Connection::Replica(conn) => conn.checkpoint().await, Connection::Schema(conn) => conn.checkpoint().await, + Connection::LibsqlPrimary(conn) => conn.checkpoint().await, + Connection::LibsqlReplica(conn) => conn.checkpoint().await, + Connection::LibsqlSchema(conn) => conn.checkpoint().await, } } @@ -127,6 +159,9 @@ impl crate::connection::Connection for Connection { Connection::Primary(conn) => conn.vacuum_if_needed().await, Connection::Replica(conn) => conn.vacuum_if_needed().await, Connection::Schema(conn) => conn.vacuum_if_needed().await, + Connection::LibsqlPrimary(conn) => conn.vacuum_if_needed().await, + Connection::LibsqlReplica(conn) => conn.vacuum_if_needed().await, + Connection::LibsqlSchema(conn) => conn.vacuum_if_needed().await, } } @@ -135,6 +170,20 @@ impl crate::connection::Connection for Connection { Connection::Primary(conn) => conn.diagnostics(), Connection::Replica(conn) => conn.diagnostics(), Connection::Schema(conn) => conn.diagnostics(), + Connection::LibsqlPrimary(conn) => conn.diagnostics(), + Connection::LibsqlReplica(conn) => conn.diagnostics(), + Connection::LibsqlSchema(conn) => conn.diagnostics(), + } + } + + fn with_raw(&self, f: impl FnOnce(&mut rusqlite::Connection) -> R) -> R { + match self { + Connection::Primary(c) => c.with_raw(f), + Connection::Replica(c) => c.with_raw(f), + Connection::Schema(c) => c.with_raw(f), + Connection::LibsqlPrimary(c) => c.with_raw(f), + Connection::LibsqlReplica(c) => c.with_raw(f), + Connection::LibsqlSchema(c) => c.with_raw(f), } } } @@ -142,7 +191,10 @@ impl crate::connection::Connection for Connection { pub enum Database { Primary(PrimaryDatabase), Replica(ReplicaDatabase), - Schema(SchemaDatabase), + Schema(SchemaDatabase), + LibsqlPrimary(LibsqlPrimaryDatabase), + LibsqlReplica(LibsqlReplicaDatabase), + LibsqlSchema(SchemaDatabase), } impl fmt::Debug for Database { @@ -150,7 +202,10 @@ impl fmt::Debug for Database { match self { Self::Primary(_) => write!(f, "Primary"), Self::Replica(_) => write!(f, "Replica"), - Database::Schema(_) => write!(f, "Schema"), + Self::Schema(_) => write!(f, "Schema"), + Self::LibsqlPrimary(_) => write!(f, "LibsqlPrimary"), + Self::LibsqlReplica(_) => write!(f, "LibsqlReplica"), + Self::LibsqlSchema(_) => write!(f, "LibsqlSchema"), } } } @@ -161,6 +216,9 @@ impl Database { Database::Primary(db) => Arc::new(db.connection_maker().map(Connection::Primary)), Database::Replica(db) => Arc::new(db.connection_maker().map(Connection::Replica)), Database::Schema(db) => Arc::new(db.connection_maker().map(Connection::Schema)), + Database::LibsqlPrimary(db) => Arc::new(db.connection_maker().map(Connection::LibsqlPrimary)), + Database::LibsqlReplica(db) => Arc::new(db.connection_maker().map(Connection::LibsqlReplica)), + Database::LibsqlSchema(db) => Arc::new(db.connection_maker().map(Connection::LibsqlSchema)), } } @@ -169,6 +227,9 @@ impl Database { Database::Primary(db) => db.destroy(), Database::Replica(db) => db.destroy(), Database::Schema(db) => db.destroy(), + Database::LibsqlPrimary(db) => db.destroy(), + Database::LibsqlReplica(db) => db.destroy(), + Database::LibsqlSchema(db) => db.destroy(), } } @@ -177,6 +238,9 @@ impl Database { Database::Primary(db) => db.shutdown().await, Database::Replica(db) => db.shutdown().await, Database::Schema(db) => db.shutdown().await, + Database::LibsqlPrimary(db) => db.shutdown().await, + Database::LibsqlReplica(db) => db.shutdown().await, + Database::LibsqlSchema(db) => db.shutdown().await, } } @@ -184,7 +248,18 @@ impl Database { match self { Database::Primary(p) => Some(p.wal_wrapper.wrapper().logger()), Database::Replica(_) => None, - Database::Schema(s) => Some(s.wal_wrapper.wrapper().logger()), + Database::Schema(s) => Some(s.wal_wrapper.as_ref().unwrap().wrapper().logger()), + Database::LibsqlPrimary(_) => None, + Database::LibsqlReplica(_) => None, + Database::LibsqlSchema(s) => Some(s.wal_wrapper.as_ref().unwrap().wrapper().logger()), + } + } + + pub fn block_writes(&self) -> Option> { + match self { + Self::Primary(p) => Some(p.block_writes.clone()), + Self::LibsqlPrimary(p) => Some(p.block_writes.clone()), + _ => None, } } @@ -199,28 +274,32 @@ impl Database { ), Database::Replica(_) => None, Database::Schema(s) => Some( - s.wal_wrapper - .wrapper() - .logger() - .new_frame_notifier - .subscribe(), + s + .wal_wrapper + .as_ref() + .unwrap() + .wrapper() + .logger() + .new_frame_notifier + .subscribe(), ), + Database::LibsqlPrimary(_) => todo!(), + Database::LibsqlReplica(_) => todo!(), + Database::LibsqlSchema(_) => todo!(), } } - pub fn as_primary(&self) -> Option<&PrimaryDatabase> { - if let Self::Primary(v) = self { - Some(v) - } else { - None + pub fn is_primary(&self) -> bool { + match self { + Self::LibsqlPrimary(_) | Self::Primary(_) => true, + _ => false, } } - pub(crate) fn as_schema(&self) -> Option<&SchemaDatabase> { - if let Self::Schema(v) = self { - Some(v) - } else { - None + pub(crate) fn is_schema(&self) -> bool { + match self { + Self::Schema(_) => true, + _ => false, } } @@ -229,6 +308,9 @@ impl Database { Database::Primary(db) => db.replicator(), Database::Replica(_) => None, Database::Schema(db) => db.replicator(), + Database::LibsqlPrimary(_) => None, + Database::LibsqlReplica(_) => None, + Database::LibsqlSchema(_) => None, } } } diff --git a/libsql-server/src/database/primary.rs b/libsql-server/src/database/primary.rs index 95f146684f..c8c9ac9890 100644 --- a/libsql-server/src/database/primary.rs +++ b/libsql-server/src/database/primary.rs @@ -1,14 +1,14 @@ use std::sync::atomic::AtomicBool; use std::sync::Arc; -use crate::connection::libsql::{LibSqlConnection, MakeLibSqlConn}; +use crate::connection::legacy::{LegacyConnection, MakeLegacyConnection}; use crate::connection::{MakeThrottledConnection, TrackedConnection}; use crate::namespace::replication_wal::ReplicationWalWrapper; use super::Result; -pub type PrimaryConnection = TrackedConnection>; -pub type PrimaryConnectionMaker = MakeThrottledConnection>; +pub type PrimaryConnection = TrackedConnection>; +pub type PrimaryConnectionMaker = MakeThrottledConnection>; pub struct PrimaryDatabase { pub wal_wrapper: ReplicationWalWrapper, diff --git a/libsql-server/src/database/replica.rs b/libsql-server/src/database/replica.rs index c559d68a58..3ffa12587d 100644 --- a/libsql-server/src/database/replica.rs +++ b/libsql-server/src/database/replica.rs @@ -1,15 +1,19 @@ use std::sync::Arc; use libsql_replication::rpc::proxy::ExecResp; +use libsql_sys::wal::wrapper::PassthroughWalWrapper; use tonic::Streaming; +use crate::connection::legacy::{LegacyConnection, MakeLegacyConnection}; use crate::connection::write_proxy::{MakeWriteProxyConn, WriteProxyConnection}; use crate::connection::{MakeThrottledConnection, TrackedConnection}; use super::Result; -pub type ReplicaConnection = TrackedConnection>>; -type ReplicaConnectionMaker = MakeThrottledConnection; +pub type ReplicaConnection = TrackedConnection< + WriteProxyConnection, LegacyConnection>, +>; +type ReplicaConnectionMaker = MakeThrottledConnection>>; pub struct ReplicaDatabase { pub connection_maker: Arc, diff --git a/libsql-server/src/database/schema.rs b/libsql-server/src/database/schema.rs index 0b9674bd60..182247fe63 100644 --- a/libsql-server/src/database/schema.rs +++ b/libsql-server/src/database/schema.rs @@ -10,24 +10,21 @@ use crate::namespace::NamespaceName; use crate::query_result_builder::QueryBuilderConfig; use crate::schema::{perform_migration, validate_migration, MigrationJobStatus, SchedulerHandle}; -use super::primary::PrimaryConnectionMaker; -use super::PrimaryConnection; - -pub struct SchemaConnection { +pub struct SchemaConnection { migration_scheduler: SchedulerHandle, schema: NamespaceName, - connection: Arc, + connection: Arc, config: MetaStoreHandle, } -impl SchemaConnection { - pub(crate) fn connection(&self) -> &PrimaryConnection { +impl SchemaConnection { + pub(crate) fn connection(&self) -> &C { &self.connection } } #[async_trait::async_trait] -impl crate::connection::Connection for SchemaConnection { +impl crate::connection::Connection for SchemaConnection { async fn execute_program( &self, mut migration: Program, @@ -140,20 +137,35 @@ impl crate::connection::Connection for SchemaConnection { fn diagnostics(&self) -> String { self.connection.diagnostics() } + + fn with_raw(&self, f: impl FnOnce(&mut rusqlite::Connection) -> R) -> R { + self.connection().with_raw(f) + } } -#[derive(Clone)] -pub struct SchemaDatabase { +pub struct SchemaDatabase { migration_scheduler: SchedulerHandle, schema: NamespaceName, - connection_maker: Arc, - pub wal_wrapper: ReplicationWalWrapper, + connection_maker: Arc, + pub wal_wrapper: Option, config: MetaStoreHandle, } +impl Clone for SchemaDatabase { + fn clone(&self) -> Self { + Self { + migration_scheduler: self.migration_scheduler.clone(), + schema: self.schema.clone(), + connection_maker: self.connection_maker.clone(), + wal_wrapper: self.wal_wrapper.clone(), + config: self.config.clone(), + } + } +} + #[async_trait::async_trait] -impl MakeConnection for SchemaDatabase { - type Connection = SchemaConnection; +impl MakeConnection for SchemaDatabase { + type Connection = SchemaConnection; async fn create(&self) -> crate::Result { let connection = Arc::new(self.connection_maker.create().await?); @@ -166,16 +178,16 @@ impl MakeConnection for SchemaDatabase { } } -impl SchemaDatabase { +impl SchemaDatabase { pub fn new( migration_scheduler: SchedulerHandle, schema: NamespaceName, - connection_maker: PrimaryConnectionMaker, - wal_wrapper: ReplicationWalWrapper, + connection_maker: Arc, + wal_wrapper: Option, config: MetaStoreHandle, ) -> Self { Self { - connection_maker: connection_maker.into(), + connection_maker, migration_scheduler, schema, wal_wrapper, @@ -184,16 +196,18 @@ impl SchemaDatabase { } pub(crate) async fn shutdown(self) -> Result<(), anyhow::Error> { - self.wal_wrapper - .wrapper() - .logger() - .closed_signal - .send_replace(true); - let wal_manager = self.wal_wrapper; - - if let Some(maybe_replicator) = wal_manager.wrapped().as_ref() { - if let Some(mut replicator) = maybe_replicator.shutdown().await { - replicator.shutdown_gracefully().await?; + if let Some(wrapper) = self.wal_wrapper { + wrapper + .wrapper() + .logger() + .closed_signal + .send_replace(true); + let wal_manager = wrapper; + + if let Some(maybe_replicator) = wal_manager.wrapped().as_ref() { + if let Some(mut replicator) = maybe_replicator.shutdown().await { + replicator.shutdown_gracefully().await?; + } } } @@ -201,11 +215,13 @@ impl SchemaDatabase { } pub(crate) fn destroy(&self) { - self.wal_wrapper - .wrapper() - .logger() - .closed_signal - .send_replace(true); + if let Some(ref wrapper) = self.wal_wrapper { + wrapper + .wrapper() + .logger() + .closed_signal + .send_replace(true); + } } pub(crate) fn connection_maker(&self) -> Self { @@ -215,8 +231,10 @@ impl SchemaDatabase { pub(crate) fn replicator( &self, ) -> Option>>> { - if let Some(wal) = self.wal_wrapper.wrapped() { - return Some(wal.replicator()); + if let Some(ref wrapper) = self.wal_wrapper { + if let Some(wal) = wrapper.wrapped() { + return Some(wal.replicator()); + } } None } diff --git a/libsql-server/src/hrana/batch.rs b/libsql-server/src/hrana/batch.rs index a8cabb1d8e..3fb31e2a8c 100644 --- a/libsql-server/src/hrana/batch.rs +++ b/libsql-server/src/hrana/batch.rs @@ -1,5 +1,6 @@ use anyhow::{anyhow, bail, Result}; use std::collections::HashMap; +use std::sync::Arc; use crate::connection::program::{Cond, Program, Step}; use crate::connection::{Connection, RequestContext}; @@ -139,7 +140,7 @@ pub fn proto_sequence_to_program(sql: &str) -> Result { Step { cond, query } }) .collect(); - Ok(Program { steps }) + Ok(Program { steps: Arc::new(steps) }) } pub async fn execute_sequence( diff --git a/libsql-server/src/http/user/dump.rs b/libsql-server/src/http/user/dump.rs index ec3486f9a3..41f88bcff6 100644 --- a/libsql-server/src/http/user/dump.rs +++ b/libsql-server/src/http/user/dump.rs @@ -10,7 +10,7 @@ use serde::Deserialize; use crate::auth::Authenticated; use crate::connection::dump::exporter::export_dump; -use crate::connection::MakeConnection; +use crate::connection::Connection as _; use crate::error::Error; use crate::BLOCKING_RT; @@ -98,7 +98,8 @@ pub(super) async fn handle_dump( let conn_maker = state .namespaces .with(namespace, |ns| { - ns.db.as_primary().unwrap().connection_maker() + assert!(ns.db.is_primary()); + ns.db.connection_maker() }) .await .unwrap(); diff --git a/libsql-server/src/http/user/mod.rs b/libsql-server/src/http/user/mod.rs index 2432370aab..17ca0e2ee9 100644 --- a/libsql-server/src/http/user/mod.rs +++ b/libsql-server/src/http/user/mod.rs @@ -22,6 +22,7 @@ use axum_extra::middleware::option_layer; use base64::prelude::BASE64_STANDARD_NO_PAD; use base64::Engine; use hyper::{header, Body, Request, Response, StatusCode}; +use libsql_replication::rpc::replication::replication_log_server::{ReplicationLog, ReplicationLogServer}; use serde::de::DeserializeOwned; use serde::Serialize; use serde_json::Number; @@ -47,8 +48,6 @@ use crate::query::{self, Query}; use crate::query_analysis::{predict_final_state, Statement, TxnStatus}; use crate::query_result_builder::QueryResultBuilder; use crate::rpc::proxy::rpc::proxy_server::{Proxy, ProxyServer}; -use crate::rpc::replication_log::rpc::replication_log_server::ReplicationLog; -use crate::rpc::ReplicationLogServer; use crate::schema::{MigrationDetails, MigrationSummary}; use crate::utils::services::idle_shutdown::IdleShutdownKicker; use crate::version; diff --git a/libsql-server/src/lib.rs b/libsql-server/src/lib.rs index 4b97b442f5..63a5617e7c 100644 --- a/libsql-server/src/lib.rs +++ b/libsql-server/src/lib.rs @@ -17,14 +17,18 @@ use crate::pager::{make_pager, PAGER_CACHE_SIZE}; use crate::rpc::proxy::rpc::proxy_server::Proxy; use crate::rpc::proxy::ProxyService; use crate::rpc::replica_proxy::ReplicaProxyService; -use crate::rpc::replication_log::rpc::replication_log_server::ReplicationLog; -use crate::rpc::replication_log::ReplicationLogService; -use crate::rpc::replication_log_proxy::ReplicationLogProxyService; +use crate::rpc::replication::libsql_replicator::LibsqlReplicationService; +use crate::rpc::replication::replication_log::rpc::replication_log_server::ReplicationLog; +use crate::rpc::replication::replication_log::ReplicationLogService; +use crate::rpc::replication::replication_log_proxy::ReplicationLogProxyService; use crate::rpc::run_rpc_server; use crate::schema::Scheduler; use crate::stats::Stats; use anyhow::Context as AnyhowContext; use auth::Auth; +use aws_config::{BehaviorVersion, Region}; +use aws_sdk_s3::config::{Credentials, SharedCredentialsProvider}; +use aws_smithy_runtime::client::http::hyper_014::HyperClientBuilder; use config::{ AdminApiConfig, DbConfig, HeartbeatConfig, RpcClientConfig, RpcServerConfig, UserApiConfig, }; @@ -34,17 +38,22 @@ use http::user::UserApi; use hyper::client::HttpConnector; use hyper::Uri; use hyper_rustls::HttpsConnector; +use libsql_replication::rpc::replication::BoxReplicationService; #[cfg(feature = "durable-wal")] use libsql_storage::{DurableWalManager, LockManager}; +use libsql_sys::wal::either::Either; #[cfg(not(feature = "durable-wal"))] use libsql_sys::wal::either::Either as EitherWAL; #[cfg(feature = "durable-wal")] use libsql_sys::wal::either::Either3 as EitherWAL; use libsql_sys::wal::Sqlite3WalManager; use libsql_wal::checkpointer::LibsqlCheckpointer; +use libsql_wal::io::StdIO; use libsql_wal::registry::WalRegistry; +use libsql_wal::segment::sealed::SealedSegment; +use libsql_wal::storage::async_storage::{AsyncStorage, AsyncStorageInitConfig}; +use libsql_wal::storage::backend::s3::S3Backend; use libsql_wal::storage::NoStorage; -use libsql_wal::wal::LibsqlWalManager; use namespace::meta_store::MetaStoreHandle; use namespace::NamespaceName; use net::Connector; @@ -62,7 +71,8 @@ use utils::services::idle_shutdown::IdleShutdownKicker; use self::config::MetaStoreConfig; use self::connection::connection_manager::InnerWalManager; use self::namespace::configurator::{ - BaseNamespaceConfig, NamespaceConfigurators, PrimaryConfigurator, PrimaryExtraConfig, + BaseNamespaceConfig, LibsqlPrimaryConfigurator, LibsqlReplicaConfigurator, + LibsqlSchemaConfigurator, NamespaceConfigurators, PrimaryConfig, PrimaryConfigurator, ReplicaConfigurator, SchemaConfigurator, }; use self::namespace::NamespaceStore; @@ -114,6 +124,16 @@ pub(crate) static BLOCKING_RT: Lazy = Lazy::new(|| { type Result = std::result::Result; type StatsSender = mpsc::Sender<(NamespaceName, MetaStoreHandle, Weak)>; +type MakeReplicationSvc = Box< + dyn FnOnce( + NamespaceStore, + Option, + Option, + bool, + ) -> BoxReplicationService + + Send + + 'static, +>; // #[global_allocator] // static GLOBAL: mimalloc::MiMalloc = mimalloc::MiMalloc; @@ -148,6 +168,7 @@ pub struct Server, pub storage_server_address: String, + pub connector: Option, } impl Default for Server { @@ -171,6 +192,7 @@ impl Default for Server { shutdown_timeout: Duration::from_secs(30), use_custom_wal: None, storage_server_address: Default::default(), + connector: None, } } } @@ -235,6 +257,9 @@ where } } +pub type SqldStorage = + Either, SealedSegment>, NoStorage>; + #[tracing::instrument(skip(connection_maker))] async fn run_periodic_checkpoint( connection_maker: Arc, @@ -461,8 +486,8 @@ where encryption_config: self.db_config.encryption_config.clone(), }; - let configurators = self - .make_configurators( + let (configurators, make_replication_svc) = self + .make_configurators_and_replication_svc( base_config, client_config.clone(), &mut join_set, @@ -519,6 +544,13 @@ where } }); + let replication_service = make_replication_svc( + namespace_store.clone(), + None, + idle_shutdown_kicker.clone(), + false, + ); + self.spawn_until_shutdown_on( &mut join_set, run_rpc_server( @@ -526,8 +558,7 @@ where config.acceptor, config.tls_config, idle_shutdown_kicker.clone(), - namespace_store.clone(), - self.disable_namespaces, + replication_service, ), ); } @@ -648,14 +679,14 @@ where Ok(()) } - async fn make_configurators( + async fn make_configurators_and_replication_svc( &self, base_config: BaseNamespaceConfig, client_config: Option<(Channel, Uri)>, join_set: &mut JoinSet>, migration_scheduler_handle: SchedulerHandle, scripted_backup: Option, - ) -> anyhow::Result { + ) -> anyhow::Result<(NamespaceConfigurators, MakeReplicationSvc)> { let wal_path = base_config.base_path.join("wals"); let enable_libsql_wal_test = { let is_primary = self.rpc_server_config.is_some(); @@ -670,12 +701,10 @@ where } } - if self.use_custom_wal.is_some() { + #[cfg(feature = "durable-wal")] + if let Some(CustomWAL::DurableWal) = self.use_custom_wal { if self.db_config.bottomless_replication.is_some() { - anyhow::bail!("bottomless not supported with custom WAL"); - } - if self.rpc_client_config.is_some() { - anyhow::bail!("custom WAL not supported in replica mode"); + anyhow::bail!("bottomless not supported with durable WAL"); } } @@ -687,7 +716,7 @@ where migration_scheduler_handle, scripted_backup, wal_path, - ), + ).await, #[cfg(feature = "durable-wal")] Some(CustomWAL::DurableWal) => self.durable_wal_configurators( base_config, @@ -707,7 +736,7 @@ where } } - fn libsql_wal_configurators( + async fn libsql_wal_configurators( &self, base_config: BaseNamespaceConfig, client_config: Option<(Channel, Uri)>, @@ -715,17 +744,65 @@ where migration_scheduler_handle: SchedulerHandle, scripted_backup: Option, wal_path: PathBuf, - ) -> anyhow::Result { + ) -> anyhow::Result<(NamespaceConfigurators, MakeReplicationSvc)> { tracing::info!("using libsql wal"); let (sender, receiver) = tokio::sync::mpsc::channel(64); - let registry = Arc::new(WalRegistry::new(wal_path, NoStorage, sender)?); + let storage = if let Some(ref opt) = self.db_config.bottomless_replication { + if client_config.is_some() { + anyhow::bail!("bottomless cannot be enabled on replicas"); + } + + let config = aws_config::load_defaults(BehaviorVersion::latest()).await; + let http_client = HyperClientBuilder::new().build(self.connector.clone().unwrap()); + let mut builder = config.into_builder(); + builder.set_http_client(Some(http_client)); + builder.set_endpoint_url(opt.aws_endpoint.clone()); + builder.set_region(Region::new( + opt.region.clone().expect("expected aws region"), + )); + let cred = Credentials::new( + opt.access_key_id.as_ref().unwrap(), + opt.secret_access_key.as_ref().unwrap(), + None, + None, + "", + ); + builder.set_credentials_provider(Some(SharedCredentialsProvider::new(cred))); + let config = builder.build(); + dbg!(&config); + let backend = S3Backend::from_sdk_config( + config, + opt.bucket_name.clone(), + opt.db_id.clone().expect("expected db id") + ).await?; + let config = AsyncStorageInitConfig { + backend: Arc::new(backend), + max_in_flight_jobs: 16, + }; + let (storage, storage_loop) = AsyncStorage::new(config).await; + + join_set.spawn(async move { + storage_loop.run().await; + Ok(()) + }); + + Either::A(storage) + } else { + Either::B(NoStorage) + }; + + if dbg!(self.rpc_server_config.is_some()) && dbg!(matches!(storage, Either::B(_))) { + anyhow::bail!("replication without bottomless not supported yet"); + } + + let registry = Arc::new(WalRegistry::new(wal_path, storage, sender)?); let checkpointer = LibsqlCheckpointer::new(registry.clone(), receiver, 8); self.spawn_until_shutdown_on(join_set, async move { checkpointer.run().await; Ok(()) }); - let namespace_resolver = |path: &Path| { + let namespace_resolver = Arc::new(|path: &Path| { NamespaceName::from_string( path.parent() .unwrap() @@ -737,36 +814,72 @@ where ) .unwrap() .into() - }; - let wal = LibsqlWalManager::new(registry.clone(), Arc::new(namespace_resolver)); + }); - self.spawn_until_shutdown_with_teardown(join_set, pending(), async move { - registry.shutdown().await?; - Ok(()) + self.spawn_until_shutdown_with_teardown(join_set, pending(), { + let registry = registry.clone(); + async move { + registry.shutdown().await?; + Ok(()) + } }); - let make_wal_manager = Arc::new(move || EitherWAL::B(wal.clone())); - // let mut configurators = NamespaceConfigurators::empty(); - - // match client_config { - // Some(_) => todo!("configure replica"), - // // configure primary - // None => self.configure_primary_common( - // base_config, - // &mut configurators, - // make_wal_manager, - // migration_scheduler_handle, - // scripted_backup, - // ), - // } - - self.configurators_common( - base_config, - client_config, - make_wal_manager, - migration_scheduler_handle, - scripted_backup, - ) + let make_replication_svc = Box::new({ + let registry = registry.clone(); + let disable_namespaces = self.disable_namespaces; + move |store, user_auth, _, _| -> BoxReplicationService { + Box::new(LibsqlReplicationService::new( + registry, + store, + user_auth, + disable_namespaces, + )) + } + }); + let mut configurators = NamespaceConfigurators::empty(); + + match client_config { + // configure replica + Some((channel, uri)) => { + let replica_configurator = LibsqlReplicaConfigurator::new( + base_config, + registry.clone(), + uri, + channel, + namespace_resolver, + ); + configurators.with_replica(replica_configurator); + } + // configure primary + None => { + let primary_config = PrimaryConfig { + max_log_size: self.db_config.max_log_size, + max_log_duration: self.db_config.max_log_duration.map(Duration::from_secs_f32), + bottomless_replication: self.db_config.bottomless_replication.clone(), + scripted_backup, + checkpoint_interval: self.db_config.checkpoint_interval, + }; + let primary_configurator = LibsqlPrimaryConfigurator::new( + base_config.clone(), + primary_config.clone(), + registry.clone(), + namespace_resolver.clone(), + ); + + let schema_configurator = LibsqlSchemaConfigurator::new( + base_config, + primary_config, + migration_scheduler_handle, + registry, + namespace_resolver, + ); + + configurators.with_primary(primary_configurator); + configurators.with_schema(schema_configurator); + } + } + + Ok((configurators, make_replication_svc)) } #[cfg(feature = "durable-wal")] @@ -776,7 +889,7 @@ where client_config: Option<(Channel, Uri)>, migration_scheduler_handle: SchedulerHandle, scripted_backup: Option, - ) -> anyhow::Result { + ) -> anyhow::Result<(NamespaceConfigurators, MakeReplicationSvc)> { tracing::info!("using durable wal"); let lock_manager = Arc::new(std::sync::Mutex::new(LockManager::new())); let namespace_resolver = |path: &Path| { @@ -798,13 +911,28 @@ where self.storage_server_address.clone(), ); let make_wal_manager = Arc::new(move || EitherWAL::C(wal.clone())); - self.configurators_common( + let configurators = self.configurators_common( base_config, client_config, make_wal_manager, migration_scheduler_handle, scripted_backup, - ) + )?; + + let make_replication_svc = Box::new({ + let disable_namespaces = self.disable_namespaces; + move |store, client_auth, idle_shutdown, collect_stats| -> BoxReplicationService { + Box::new(ReplicationLogService::new( + store, + idle_shutdown, + client_auth, + disable_namespaces, + collect_stats, + )) + } + }); + + Ok((configurators, make_replication_svc)) } fn spawn_until_shutdown_on(&self, join_set: &mut JoinSet>, fut: F) @@ -841,15 +969,30 @@ where client_config: Option<(Channel, Uri)>, migration_scheduler_handle: SchedulerHandle, scripted_backup: Option, - ) -> anyhow::Result { + ) -> anyhow::Result<(NamespaceConfigurators, MakeReplicationSvc)> { let make_wal_manager = Arc::new(|| EitherWAL::A(Sqlite3WalManager::default())); - self.configurators_common( + let configurators = self.configurators_common( base_config, client_config, make_wal_manager, migration_scheduler_handle, scripted_backup, - ) + )?; + + let make_replication_svc = Box::new({ + let disable_namespaces = self.disable_namespaces; + move |store, client_auth, idle_shutdown, collect_stats| -> BoxReplicationService { + Box::new(ReplicationLogService::new( + store, + idle_shutdown, + client_auth, + disable_namespaces, + collect_stats, + )) + } + }); + + Ok((configurators, make_replication_svc)) } fn configurators_common( @@ -889,7 +1032,7 @@ where migration_scheduler_handle: SchedulerHandle, scripted_backup: Option, ) { - let primary_config = PrimaryExtraConfig { + let primary_config = PrimaryConfig { max_log_size: self.db_config.max_log_size, max_log_duration: self.db_config.max_log_duration.map(Duration::from_secs_f32), bottomless_replication: self.db_config.bottomless_replication.clone(), @@ -968,7 +1111,7 @@ where // match self.use_custom_wal { // Some(CustomWAL::LibsqlWal) => { // let (sender, receiver) = tokio::sync::mpsc::channel(64); - // let registry = Arc::new(WalRegistry::new(wal_path, NoStorage, sender)?); + // let registry = Arc::new(WalRegistry::new(wal_path, SqldStorage, sender)?); // let checkpointer = LibsqlCheckpointer::new(registry.clone(), receiver, 8); // join_set.spawn(async move { // checkpointer.run().await; diff --git a/libsql-server/src/main.rs b/libsql-server/src/main.rs index 6fce78a06a..edd448e5b9 100644 --- a/libsql-server/src/main.rs +++ b/libsql-server/src/main.rs @@ -651,6 +651,7 @@ async fn build_server(config: &Cli) -> anyhow::Result { .unwrap_or(Duration::from_secs(30)), use_custom_wal: config.use_custom_wal, storage_server_address: config.storage_server_address.clone(), + connector: Some(HttpConnector::new()), }) } diff --git a/libsql-server/src/namespace/configurator/fork.rs b/libsql-server/src/namespace/configurator/fork.rs index 03f2ac03d8..4b3e58ee85 100644 --- a/libsql-server/src/namespace/configurator/fork.rs +++ b/libsql-server/src/namespace/configurator/fork.rs @@ -20,7 +20,7 @@ use crate::replication::{LogReadError, ReplicationLogger}; use crate::{BLOCKING_RT, LIBSQL_PAGE_SIZE}; use super::helpers::make_bottomless_options; -use super::{NamespaceName, NamespaceStore, PrimaryExtraConfig, RestoreOption}; +use super::{NamespaceName, NamespaceStore, PrimaryConfig, RestoreOption}; type Result = crate::Result; @@ -31,7 +31,7 @@ pub(super) async fn fork( to_config: MetaStoreHandle, timestamp: Option, store: NamespaceStore, - primary_config: &PrimaryExtraConfig, + primary_config: &PrimaryConfig, base_path: Arc, ) -> crate::Result { let from_config = from_config.get(); @@ -55,7 +55,7 @@ pub(super) async fn fork( let logger = match &from_ns.db { Database::Primary(db) => db.wal_wrapper.wrapper().logger(), - Database::Schema(db) => db.wal_wrapper.wrapper().logger(), + Database::Schema(db) => db.wal_wrapper.as_ref().unwrap().wrapper().logger(), _ => { return Err(crate::Error::Fork(ForkError::Internal(anyhow::Error::msg( "Invalid source database type for fork", diff --git a/libsql-server/src/namespace/configurator/helpers.rs b/libsql-server/src/namespace/configurator/helpers.rs index 355b1b1472..a1102d0ea8 100644 --- a/libsql-server/src/namespace/configurator/helpers.rs +++ b/libsql-server/src/namespace/configurator/helpers.rs @@ -17,7 +17,7 @@ use tokio_util::io::StreamReader; use crate::connection::config::DatabaseConfig; use crate::connection::connection_manager::InnerWalManager; -use crate::connection::libsql::{open_conn, MakeLibSqlConn}; +use crate::connection::legacy::{open_conn, MakeLegacyConnection}; use crate::connection::{Connection as _, MakeConnection as _}; use crate::database::{PrimaryConnection, PrimaryConnectionMaker}; use crate::error::LoadDumpError; @@ -32,14 +32,14 @@ use crate::replication::{FrameNo, ReplicationLogger}; use crate::stats::Stats; use crate::{StatsSender, BLOCKING_RT, DB_CREATE_TIMEOUT, DEFAULT_AUTO_CHECKPOINT}; -use super::{BaseNamespaceConfig, PrimaryExtraConfig}; +use super::{BaseNamespaceConfig, PrimaryConfig}; const WASM_TABLE_CREATE: &str = "CREATE TABLE libsql_wasm_func_table (name text PRIMARY KEY, body text) WITHOUT ROWID;"; #[tracing::instrument(skip_all)] pub(super) async fn make_primary_connection_maker( - primary_config: &PrimaryExtraConfig, + primary_config: &PrimaryConfig, base_config: &BaseNamespaceConfig, meta_store_handle: &MetaStoreHandle, db_path: &Path, @@ -125,7 +125,7 @@ pub(super) async fn make_primary_connection_maker( tracing::debug!("Opening libsql connection"); - let connection_maker = MakeLibSqlConn::new( + let connection_maker = MakeLegacyConnection::new( db_path.to_path_buf(), wal_wrapper.clone(), stats.clone(), @@ -421,7 +421,7 @@ async fn run_storage_monitor( pub(super) async fn cleanup_primary( base: &BaseNamespaceConfig, - primary_config: &PrimaryExtraConfig, + primary_config: &PrimaryConfig, namespace: &NamespaceName, db_config: &DatabaseConfig, prune_all: bool, diff --git a/libsql-server/src/namespace/configurator/libsql_primary.rs b/libsql-server/src/namespace/configurator/libsql_primary.rs new file mode 100644 index 0000000000..3966c969a9 --- /dev/null +++ b/libsql-server/src/namespace/configurator/libsql_primary.rs @@ -0,0 +1,241 @@ +use std::path::Path; +use std::pin::Pin; +use std::sync::atomic::{AtomicBool, Ordering}; +use std::sync::Arc; + +use futures::prelude::Future; +use libsql_sys::name::NamespaceResolver; +use libsql_wal::io::StdIO; +use libsql_wal::registry::WalRegistry; +use libsql_wal::wal::LibsqlWalManager; +use tokio::task::JoinSet; + +use crate::connection::config::DatabaseConfig; +use crate::connection::libsql::{MakeLibsqlConnection, MakeLibsqlConnectionInner}; +use crate::connection::{Connection as _, MakeConnection}; +use crate::database::{Database, LibsqlPrimaryConnectionMaker, LibsqlPrimaryDatabase}; +use crate::namespace::broadcasters::BroadcasterHandle; +use crate::namespace::configurator::helpers::make_stats; +use crate::namespace::meta_store::MetaStoreHandle; +use crate::namespace::{ + Namespace, NamespaceBottomlessDbIdInit, NamespaceName, NamespaceStore, ResetCb, + ResolveNamespacePathFn, RestoreOption, +}; +use crate::stats::Stats; +use crate::{SqldStorage, DB_CREATE_TIMEOUT, DEFAULT_AUTO_CHECKPOINT}; +use crate::schema::{has_pending_migration_task, setup_migration_table}; + +use super::{BaseNamespaceConfig, ConfigureNamespace, PrimaryConfig}; + +pub struct LibsqlPrimaryConfigurator { + base: BaseNamespaceConfig, + primary_config: PrimaryConfig, + registry: Arc>, + namespace_resolver: Arc, +} + +pub struct LibsqlPrimaryCommon { + pub stats: Arc, + pub connection_maker: Arc, + pub join_set: JoinSet>, + pub block_writes: Arc, +} + +pub(super) async fn libsql_primary_common( + db_path: Arc, + db_config: MetaStoreHandle, + base_config: &BaseNamespaceConfig, + primary_config: &PrimaryConfig, + namespace: NamespaceName, + broadcaster: BroadcasterHandle, + resolve_attach_path: ResolveNamespacePathFn, + registry: Arc>, + namespace_resolver: Arc, +) -> crate::Result { + let mut join_set = JoinSet::new(); + + tokio::fs::create_dir_all(&db_path).await?; + + + tracing::debug!("Done making new primary"); + let (_snd, rcv) = tokio::sync::watch::channel(None); + let stats = make_stats( + &db_path, + &mut join_set, + db_config.clone(), + base_config.stats_sender.clone(), + namespace.clone(), + rcv.clone(), + base_config.encryption_config.clone(), + ) + .await?; + + let auto_checkpoint = if primary_config.checkpoint_interval.is_some() { + 0 + } else { + DEFAULT_AUTO_CHECKPOINT + }; + let block_writes = Arc::new(AtomicBool::new(false)); + let connection_maker = MakeLibsqlConnection { + inner: Arc::new(MakeLibsqlConnectionInner { + db_path: db_path.into(), + stats: stats.clone(), + broadcaster, + config_store: db_config.clone(), + extensions: base_config.extensions.clone(), + max_response_size: base_config.max_response_size, + max_total_response_size: base_config.max_total_response_size, + auto_checkpoint, + current_frame_no_receiver: rcv.clone(), + encryption_config: base_config.encryption_config.clone(), + block_writes: block_writes.clone(), + resolve_attach_path, + wal_manager: LibsqlWalManager::new(registry.clone(), namespace_resolver.clone()), + }) + } + .throttled( + base_config.max_concurrent_connections.clone(), + Some(DB_CREATE_TIMEOUT), + base_config.max_total_response_size, + base_config.max_concurrent_requests, + ); + let connection_maker = Arc::new(connection_maker); + + if db_config.get().shared_schema_name.is_some() { + let block_writes = block_writes.clone(); + let conn = connection_maker.create().await?; + tokio::task::spawn_blocking(move || { + conn.with_raw(|conn| -> crate::Result<()> { + setup_migration_table(conn)?; + if has_pending_migration_task(conn)? { + block_writes.store(true, Ordering::SeqCst); + } + Ok(()) + }) + }) + .await + .unwrap()?; + } + + Ok(LibsqlPrimaryCommon { + stats, + connection_maker, + join_set, + block_writes, + }) +} + +impl LibsqlPrimaryConfigurator { + pub fn new( + base: BaseNamespaceConfig, + primary_config: PrimaryConfig, + registry: Arc>, + namespace_resolver: Arc, + ) -> Self { + Self { + base, + primary_config, + registry, + namespace_resolver + } + } + + #[tracing::instrument(skip_all, fields(namespace))] + async fn try_new_primary( + &self, + namespace: NamespaceName, + db_config: MetaStoreHandle, + _restore_option: RestoreOption, + resolve_attach_path: ResolveNamespacePathFn, + db_path: Arc, + broadcaster: BroadcasterHandle, + ) -> crate::Result { + let common = libsql_primary_common( + db_path.clone(), + db_config.clone(), + &self.base, + &self.primary_config, + namespace.clone(), + broadcaster, + resolve_attach_path, + self.registry.clone(), + self.namespace_resolver.clone() + ).await?; + + Ok(Namespace { + tasks: common.join_set, + db: Database::LibsqlPrimary(LibsqlPrimaryDatabase { + connection_maker: common.connection_maker, + block_writes: common.block_writes, + }), + name: namespace, + stats: common.stats, + db_config_store: db_config, + path: db_path.into(), + }) + } +} + +impl ConfigureNamespace for LibsqlPrimaryConfigurator { + fn setup<'a>( + &'a self, + meta_store_handle: MetaStoreHandle, + restore_option: RestoreOption, + name: &'a NamespaceName, + _reset: ResetCb, + resolve_attach_path: ResolveNamespacePathFn, + _store: NamespaceStore, + broadcaster: BroadcasterHandle, + ) -> Pin> + Send + 'a>> { + Box::pin(async move { + let db_path: Arc = self.base.base_path.join("dbs").join(name.as_str()).into(); + let fresh_namespace = !db_path.try_exists()?; + // FIXME: make that truly atomic. explore the idea of using temp directories, and it's implications + match self + .try_new_primary( + name.clone(), + meta_store_handle, + restore_option, + resolve_attach_path, + db_path.clone(), + broadcaster, + ) + .await + { + Ok(this) => Ok(this), + Err(e) if fresh_namespace => { + tracing::error!( + "an error occured while deleting creating namespace, cleaning..." + ); + if let Err(e) = tokio::fs::remove_dir_all(&db_path).await { + tracing::error!("failed to remove dirty namespace directory: {e}") + } + Err(e) + } + Err(e) => Err(e), + } + }) + } + + fn cleanup<'a>( + &'a self, + _namespace: &'a NamespaceName, + _db_config: &'a DatabaseConfig, + _prune_all: bool, + _bottomless_db_id_init: NamespaceBottomlessDbIdInit, + ) -> Pin> + Send + 'a>> { + unimplemented!() + } + + fn fork<'a>( + &'a self, + _from_ns: &'a Namespace, + _from_config: MetaStoreHandle, + _to_ns: NamespaceName, + _to_config: MetaStoreHandle, + _timestamp: Option, + _store: NamespaceStore, + ) -> Pin> + Send + 'a>> { + unimplemented!() + } +} diff --git a/libsql-server/src/namespace/configurator/libsql_replica.rs b/libsql-server/src/namespace/configurator/libsql_replica.rs new file mode 100644 index 0000000000..d641cb36fd --- /dev/null +++ b/libsql-server/src/namespace/configurator/libsql_replica.rs @@ -0,0 +1,281 @@ +use std::future::Future; +use std::pin::Pin; +use std::sync::Arc; + +use hyper::Uri; +use libsql_replication::injector::LibsqlInjector; +use libsql_replication::replicator::Replicator; +use libsql_replication::rpc::replication::log_offset::WalFlavor; +use libsql_replication::rpc::replication::replication_log_client::ReplicationLogClient; +use libsql_sys::name::NamespaceResolver; +use libsql_wal::io::StdIO; +use libsql_wal::registry::WalRegistry; +use libsql_wal::replication::injector::Injector; +use libsql_wal::transaction::Transaction; +use libsql_wal::wal::LibsqlWalManager; +use tokio::task::JoinSet; +use tonic::transport::Channel; + +use crate::connection::config::DatabaseConfig; +use crate::connection::libsql::{MakeLibsqlConnection, MakeLibsqlConnectionInner}; +use crate::connection::write_proxy::MakeWriteProxyConn; +use crate::connection::MakeConnection; +use crate::database::{Database, LibsqlReplicaDatabase}; +use crate::namespace::broadcasters::BroadcasterHandle; +use crate::namespace::configurator::helpers::make_stats; +use crate::namespace::meta_store::MetaStoreHandle; +use crate::namespace::{ + Namespace, NamespaceBottomlessDbIdInit, NamespaceName, NamespaceStore, ResetCb, ResetOp, + ResolveNamespacePathFn, RestoreOption, +}; +use crate::{SqldStorage, DB_CREATE_TIMEOUT}; + +use super::{BaseNamespaceConfig, ConfigureNamespace}; + +pub struct LibsqlReplicaConfigurator { + base: BaseNamespaceConfig, + registry: Arc>, + uri: Uri, + channel: Channel, + namespace_resolver: Arc, +} + +impl LibsqlReplicaConfigurator { + pub fn new( + base: BaseNamespaceConfig, + registry: Arc>, + uri: Uri, + channel: Channel, + namespace_resolver: Arc, + ) -> Self { + Self { + base, + registry, + uri, + channel, + namespace_resolver, + } + } +} + +impl ConfigureNamespace for LibsqlReplicaConfigurator { + fn setup<'a>( + &'a self, + db_config: MetaStoreHandle, + restore_option: RestoreOption, + name: &'a NamespaceName, + reset: ResetCb, + resolve_attach_path: ResolveNamespacePathFn, + store: NamespaceStore, + broadcaster: BroadcasterHandle, + ) -> Pin> + Send + 'a>> { + Box::pin(async move { + tracing::debug!("creating replica namespace"); + let mut join_set = JoinSet::new(); + let db_path = self.base.base_path.join("dbs").join(name.as_str()); + let channel = self.channel.clone(); + let uri = self.uri.clone(); + let rpc_client = ReplicationLogClient::with_origin(channel.clone(), uri.clone()); + let client = crate::replication::replicator_client::Client::new( + name.clone(), + rpc_client, + &db_path, + db_config.clone(), + store.clone(), + WalFlavor::Libsql, + ) + .await?; + let applied_frame_no_receiver = client.current_frame_no_notifier.subscribe(); + let stats = make_stats( + &db_path, + &mut join_set, + db_config.clone(), + self.base.stats_sender.clone(), + name.clone(), + applied_frame_no_receiver.clone(), + self.base.encryption_config.clone(), + ) + .await?; + + let connection_maker = MakeLibsqlConnection { + inner: Arc::new(MakeLibsqlConnectionInner { + db_path: db_path.clone().into(), + stats: stats.clone(), + broadcaster: broadcaster.clone(), + config_store: db_config.clone(), + extensions: self.base.extensions.clone(), + max_response_size: self.base.max_response_size, + max_total_response_size: self.base.max_total_response_size, + auto_checkpoint: 0, + current_frame_no_receiver: applied_frame_no_receiver.clone(), + encryption_config: self.base.encryption_config.clone(), + block_writes: Arc::new(true.into()), + resolve_attach_path: resolve_attach_path.clone(), + wal_manager: LibsqlWalManager::new( + self.registry.clone(), + self.namespace_resolver.clone(), + ), + }), + }; + + let connection_maker = MakeWriteProxyConn::new( + channel.clone(), + uri.clone(), + stats.clone(), + applied_frame_no_receiver.clone(), + self.base.max_response_size, + self.base.max_total_response_size, + // FIXME: we need to fetch the primary index before + None, + self.base.encryption_config.clone(), + connection_maker, + ) + .throttled( + self.base.max_concurrent_connections.clone(), + Some(DB_CREATE_TIMEOUT), + self.base.max_total_response_size, + self.base.max_concurrent_requests, + ); + + // FIXME: hack, this is necessary for the registry to open the SharedWal + let _ = connection_maker.create().await?; + let shared = self + .registry + .get_async(&(name.clone().into())) + .await + .unwrap(); + + let mut tx = Transaction::Read(shared.begin_read(u64::MAX)); + shared.upgrade(&mut tx).unwrap(); + let guard = tx + .into_write() + .unwrap_or_else(|_| panic!()) + .into_lock_owned(); + let injector = Injector::new(shared, guard, 10).unwrap(); + let injector = LibsqlInjector::new(injector); + let mut replicator = Replicator::new(client, injector); + + tracing::debug!("try perform handshake"); + // force a handshake now, to retrieve the primary's current replication index + match replicator.try_perform_handshake().await { + Err(libsql_replication::replicator::Error::Meta( + libsql_replication::meta::Error::LogIncompatible, + )) => { + tracing::error!( + "trying to replicate incompatible logs, reseting replica and nuking db dir" + ); + std::fs::remove_dir_all(&db_path).unwrap(); + return self + .setup( + db_config, + restore_option, + name, + reset, + resolve_attach_path, + store, + broadcaster, + ) + .await; + } + Err(e) => Err(e)?, + Ok(_) => (), + } + + tracing::debug!("done performing handshake"); + + + let namespace = name.clone(); + join_set.spawn(async move { + use libsql_replication::replicator::Error; + loop { + match replicator.run().await { + err @ Error::Fatal(_) => Err(err)?, + err @ Error::NamespaceDoesntExist => { + tracing::error!("namespace {namespace} doesn't exist, destroying..."); + (reset)(ResetOp::Destroy(namespace.clone())); + Err(err)?; + } + e @ Error::Injector(_) => { + tracing::error!("potential corruption detected while replicating, reseting replica: {e}"); + (reset)(ResetOp::Reset(namespace.clone())); + Err(e)?; + }, + Error::Meta(err) => { + use libsql_replication::meta::Error; + match err { + Error::LogIncompatible => { + tracing::error!("trying to replicate incompatible logs, reseting replica"); + (reset)(ResetOp::Reset(namespace.clone())); + Err(err)?; + } + Error::InvalidMetaFile + | Error::Io(_) + | Error::InvalidLogId + | Error::FailedToCommit(_) + | Error::InvalidReplicationPath + | Error::RequiresCleanDatabase => { + // We retry from last frame index? + tracing::warn!("non-fatal replication error, retrying from last commit index: {err}"); + }, + } + } + e @ (Error::Internal(_) + | Error::Client(_) + | Error::PrimaryHandshakeTimeout + | Error::NeedSnapshot) => { + tracing::warn!("non-fatal replication error, retrying from last commit index: {e}"); + }, + Error::NoHandshake => { + // not strictly necessary, but in case the handshake error goes uncaught, + // we reset the client state. + replicator.client_mut().reset_token(); + } + Error::SnapshotPending => unreachable!(), + } + } + }); + + Ok(Namespace { + tasks: join_set, + db: Database::LibsqlReplica(LibsqlReplicaDatabase { + connection_maker: Arc::new(connection_maker), + }), + name: name.clone(), + stats, + db_config_store: db_config, + path: db_path.into(), + }) + }) + } + + fn cleanup<'a>( + &'a self, + namespace: &'a NamespaceName, + _db_config: &DatabaseConfig, + _prune_all: bool, + _bottomless_db_id_init: NamespaceBottomlessDbIdInit, + ) -> Pin> + Send + 'a>> { + Box::pin(async move { + let ns_path = self.base.base_path.join("dbs").join(namespace.as_str()); + if ns_path.try_exists()? { + tracing::debug!("removing database directory: {}", ns_path.display()); + tokio::fs::remove_dir_all(ns_path).await?; + } + Ok(()) + }) + } + + fn fork<'a>( + &'a self, + _from_ns: &'a Namespace, + _from_config: MetaStoreHandle, + _to_ns: NamespaceName, + _to_config: MetaStoreHandle, + _timestamp: Option, + _store: NamespaceStore, + ) -> Pin> + Send + 'a>> { + Box::pin(std::future::ready(Err(crate::Error::Fork( + super::fork::ForkError::ForkReplica, + )))) + } +} diff --git a/libsql-server/src/namespace/configurator/libsql_schema.rs b/libsql-server/src/namespace/configurator/libsql_schema.rs new file mode 100644 index 0000000000..fc30a86885 --- /dev/null +++ b/libsql-server/src/namespace/configurator/libsql_schema.rs @@ -0,0 +1,168 @@ +use std::path::Path; +use std::sync::Arc; + +use futures::prelude::Future; +use libsql_sys::name::NamespaceResolver; +use libsql_wal::io::StdIO; +use libsql_wal::registry::WalRegistry; + +use crate::connection::config::DatabaseConfig; +use crate::database::{Database, SchemaDatabase}; +use crate::namespace::broadcasters::BroadcasterHandle; +use crate::namespace::meta_store::MetaStoreHandle; +use crate::namespace::{ + Namespace, NamespaceName, NamespaceStore, ResetCb, ResolveNamespacePathFn, RestoreOption, +}; +use crate::schema::SchedulerHandle; +use crate::SqldStorage; + +use super::helpers::cleanup_primary; +use super::libsql_primary::libsql_primary_common; +use super::{BaseNamespaceConfig, ConfigureNamespace, PrimaryConfig}; + +pub struct LibsqlSchemaConfigurator { + base: BaseNamespaceConfig, + primary_config: PrimaryConfig, + migration_scheduler: SchedulerHandle, + registry: Arc>, + namespace_resolver: Arc, +} + +impl LibsqlSchemaConfigurator { + pub fn new( + base: BaseNamespaceConfig, + primary_config: PrimaryConfig, + migration_scheduler: SchedulerHandle, + registry: Arc>, + namespace_resolver: Arc, + ) -> Self { + Self { + base, + primary_config, + migration_scheduler, + registry, + namespace_resolver, + } + } + + #[tracing::instrument(skip_all, fields(namespace))] + async fn try_new_schema( + &self, + namespace: NamespaceName, + db_config: MetaStoreHandle, + _restore_option: RestoreOption, + resolve_attach_path: ResolveNamespacePathFn, + db_path: Arc, + broadcaster: BroadcasterHandle, + ) -> crate::Result { + let common = libsql_primary_common( + db_path.clone(), + db_config.clone(), + &self.base, + &self.primary_config, + namespace.clone(), + broadcaster, + resolve_attach_path, + self.registry.clone(), + self.namespace_resolver.clone() + ).await?; + + Ok(Namespace { + tasks: common.join_set, + db: Database::LibsqlSchema(SchemaDatabase::new( + self.migration_scheduler.clone(), + namespace.clone(), + common.connection_maker, + None, + db_config.clone(), + )), + name: namespace, + stats: common.stats, + db_config_store: db_config, + path: db_path.into(), + }) + } +} + +impl ConfigureNamespace for LibsqlSchemaConfigurator { + fn setup<'a>( + &'a self, + db_config: MetaStoreHandle, + restore_option: RestoreOption, + name: &'a NamespaceName, + _reset: ResetCb, + resolve_attach_path: ResolveNamespacePathFn, + _store: NamespaceStore, + broadcaster: BroadcasterHandle, + ) -> std::pin::Pin> + Send + 'a>> { + Box::pin(async move { + let db_path: Arc = self.base.base_path.join("dbs").join(name.as_str()).into(); + let fresh_namespace = !db_path.try_exists()?; + // FIXME: make that truly atomic. explore the idea of using temp directories, and it's implications + match self + .try_new_schema( + name.clone(), + db_config, + restore_option, + resolve_attach_path, + db_path.clone(), + broadcaster, + ) + .await + { + Ok(this) => Ok(this), + Err(e) if fresh_namespace => { + tracing::error!( + "an error occured while deleting creating namespace, cleaning..." + ); + if let Err(e) = tokio::fs::remove_dir_all(&db_path).await { + tracing::error!("failed to remove dirty namespace directory: {e}") + } + Err(e) + } + Err(e) => Err(e), + } + }) + } + + fn cleanup<'a>( + &'a self, + namespace: &'a NamespaceName, + db_config: &'a DatabaseConfig, + prune_all: bool, + bottomless_db_id_init: crate::namespace::NamespaceBottomlessDbIdInit, + ) -> std::pin::Pin> + Send + 'a>> { + Box::pin(async move { + cleanup_primary( + &self.base, + &self.primary_config, + namespace, + db_config, + prune_all, + bottomless_db_id_init, + ) + .await + }) + } + + fn fork<'a>( + &'a self, + from_ns: &'a Namespace, + from_config: MetaStoreHandle, + to_ns: NamespaceName, + to_config: MetaStoreHandle, + timestamp: Option, + store: NamespaceStore, + ) -> std::pin::Pin> + Send + 'a>> { + Box::pin(super::fork::fork( + from_ns, + from_config, + to_ns, + to_config, + timestamp, + store, + &self.primary_config, + self.base.base_path.clone(), + )) + } +} diff --git a/libsql-server/src/namespace/configurator/libsql_wal_replica.rs b/libsql-server/src/namespace/configurator/libsql_wal_replica.rs deleted file mode 100644 index 6ab6cc52ef..0000000000 --- a/libsql-server/src/namespace/configurator/libsql_wal_replica.rs +++ /dev/null @@ -1,139 +0,0 @@ -use std::future::Future; -use std::pin::Pin; -use std::sync::Arc; - -use chrono::prelude::NaiveDateTime; -use hyper::Uri; -use libsql_replication::rpc::replication::replication_log_client::ReplicationLogClient; -use libsql_wal::io::StdIO; -use libsql_wal::registry::WalRegistry; -use libsql_wal::storage::NoStorage; -use tokio::task::JoinSet; -use tonic::transport::Channel; - -use crate::connection::config::DatabaseConfig; -use crate::connection::connection_manager::InnerWalManager; -use crate::connection::write_proxy::MakeWriteProxyConn; -use crate::connection::MakeConnection; -use crate::database::{Database, ReplicaDatabase}; -use crate::namespace::broadcasters::BroadcasterHandle; -use crate::namespace::configurator::helpers::make_stats; -use crate::namespace::meta_store::MetaStoreHandle; -use crate::namespace::{ - Namespace, NamespaceBottomlessDbIdInit, NamespaceName, NamespaceStore, ResetCb, - ResolveNamespacePathFn, RestoreOption, -}; -use crate::DEFAULT_AUTO_CHECKPOINT; - -use super::{BaseNamespaceConfig, ConfigureNamespace}; - -pub struct LibsqlWalReplicaConfigurator { - base: BaseNamespaceConfig, - registry: Arc>, - uri: Uri, - channel: Channel, - make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, -} - -impl ConfigureNamespace for LibsqlWalReplicaConfigurator { - fn setup<'a>( - &'a self, - db_config: MetaStoreHandle, - restore_option: RestoreOption, - name: &'a NamespaceName, - reset: ResetCb, - resolve_attach_path: ResolveNamespacePathFn, - store: NamespaceStore, - broadcaster: BroadcasterHandle, - ) -> Pin> + Send + 'a>> { - todo!() - // Box::pin(async move { - // tracing::debug!("creating replica namespace"); - // let db_path = self.base.base_path.join("dbs").join(name.as_str()); - // let channel = self.channel.clone(); - // let uri = self.uri.clone(); - // - // let rpc_client = ReplicationLogClient::with_origin(channel.clone(), uri.clone()); - // // TODO! setup replication - // - // let mut join_set = JoinSet::new(); - // let namespace = name.clone(); - // - // let stats = make_stats( - // &db_path, - // &mut join_set, - // db_config.clone(), - // self.base.stats_sender.clone(), - // name.clone(), - // applied_frame_no_receiver.clone(), - // ) - // .await?; - // - // let connection_maker = MakeWriteProxyConn::new( - // db_path.clone(), - // self.base.extensions.clone(), - // channel.clone(), - // uri.clone(), - // stats.clone(), - // broadcaster, - // db_config.clone(), - // applied_frame_no_receiver, - // self.base.max_response_size, - // self.base.max_total_response_size, - // primary_current_replication_index, - // None, - // resolve_attach_path, - // self.make_wal_manager.clone(), - // ) - // .await? - // .throttled( - // self.base.max_concurrent_connections.clone(), - // Some(DB_CREATE_TIMEOUT), - // self.base.max_total_response_size, - // self.base.max_concurrent_requests, - // ); - // - // Ok(Namespace { - // tasks: join_set, - // db: Database::Replica(ReplicaDatabase { - // connection_maker: Arc::new(connection_maker), - // }), - // name: name.clone(), - // stats, - // db_config_store: db_config, - // path: db_path.into(), - // }) - // }) - } - - fn cleanup<'a>( - &'a self, - namespace: &'a NamespaceName, - _db_config: &DatabaseConfig, - _prune_all: bool, - _bottomless_db_id_init: NamespaceBottomlessDbIdInit, - ) -> Pin> + Send + 'a>> { - Box::pin(async move { - let ns_path = self.base.base_path.join("dbs").join(namespace.as_str()); - if ns_path.try_exists()? { - tracing::debug!("removing database directory: {}", ns_path.display()); - tokio::fs::remove_dir_all(ns_path).await?; - } - Ok(()) - }) - } - - fn fork<'a>( - &'a self, - _from_ns: &'a Namespace, - _from_config: MetaStoreHandle, - _to_ns: NamespaceName, - _to_config: MetaStoreHandle, - _timestamp: Option, - _store: NamespaceStore, - ) -> Pin> + Send + 'a>> { - Box::pin(std::future::ready(Err(crate::Error::Fork( - super::fork::ForkError::ForkReplica, - )))) - } -} diff --git a/libsql-server/src/namespace/configurator/mod.rs b/libsql-server/src/namespace/configurator/mod.rs index b96d5a3824..2517a1113c 100644 --- a/libsql-server/src/namespace/configurator/mod.rs +++ b/libsql-server/src/namespace/configurator/mod.rs @@ -21,14 +21,19 @@ use super::{ pub mod fork; mod helpers; -// mod libsql_wal_replica; +mod libsql_replica; mod primary; mod replica; mod schema; +mod libsql_primary; +mod libsql_schema; pub use primary::PrimaryConfigurator; pub use replica::ReplicaConfigurator; pub use schema::SchemaConfigurator; +pub use libsql_primary::LibsqlPrimaryConfigurator; +pub use libsql_replica::LibsqlReplicaConfigurator; +pub use libsql_schema::LibsqlSchemaConfigurator; #[derive(Clone, Debug)] pub struct BaseNamespaceConfig { @@ -43,7 +48,7 @@ pub struct BaseNamespaceConfig { } #[derive(Clone)] -pub struct PrimaryExtraConfig { +pub struct PrimaryConfig { pub(crate) max_log_size: u64, pub(crate) max_log_duration: Option, pub(crate) bottomless_replication: Option, diff --git a/libsql-server/src/namespace/configurator/primary.rs b/libsql-server/src/namespace/configurator/primary.rs index 03cdd2fd7b..66570e9c27 100644 --- a/libsql-server/src/namespace/configurator/primary.rs +++ b/libsql-server/src/namespace/configurator/primary.rs @@ -9,7 +9,7 @@ use tokio::task::JoinSet; use crate::connection::config::DatabaseConfig; use crate::connection::connection_manager::InnerWalManager; -use crate::connection::MakeConnection; +use crate::connection::{Connection as _, MakeConnection}; use crate::database::{Database, PrimaryDatabase}; use crate::namespace::broadcasters::BroadcasterHandle; use crate::namespace::configurator::helpers::make_primary_connection_maker; @@ -22,18 +22,18 @@ use crate::run_periodic_checkpoint; use crate::schema::{has_pending_migration_task, setup_migration_table}; use super::helpers::cleanup_primary; -use super::{BaseNamespaceConfig, ConfigureNamespace, PrimaryExtraConfig}; +use super::{BaseNamespaceConfig, ConfigureNamespace, PrimaryConfig}; pub struct PrimaryConfigurator { base: BaseNamespaceConfig, - primary_config: PrimaryExtraConfig, + primary_config: PrimaryConfig, make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, } impl PrimaryConfigurator { pub fn new( base: BaseNamespaceConfig, - primary_config: PrimaryExtraConfig, + primary_config: PrimaryConfig, make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, ) -> Self { Self { diff --git a/libsql-server/src/namespace/configurator/replica.rs b/libsql-server/src/namespace/configurator/replica.rs index 7832d30ef8..97342fbe16 100644 --- a/libsql-server/src/namespace/configurator/replica.rs +++ b/libsql-server/src/namespace/configurator/replica.rs @@ -1,15 +1,18 @@ use std::pin::Pin; +use std::sync::atomic::AtomicBool; use std::sync::Arc; use futures::Future; use hyper::Uri; use libsql_replication::rpc::replication::log_offset::WalFlavor; use libsql_replication::rpc::replication::replication_log_client::ReplicationLogClient; +use libsql_sys::wal::wrapper::PassthroughWalWrapper; use tokio::task::JoinSet; use tonic::transport::Channel; use crate::connection::config::DatabaseConfig; use crate::connection::connection_manager::InnerWalManager; +use crate::connection::legacy::MakeLegacyConnection; use crate::connection::write_proxy::MakeWriteProxyConn; use crate::connection::MakeConnection; use crate::database::{Database, ReplicaDatabase}; @@ -175,23 +178,35 @@ impl ConfigureNamespace for ReplicaConfigurator { ) .await?; - let connection_maker = MakeWriteProxyConn::new( + let connection_maker = MakeLegacyConnection::new( db_path.clone(), + PassthroughWalWrapper, + stats.clone(), + broadcaster, + meta_store_handle.clone(), self.base.extensions.clone(), + self.base.max_response_size, + self.base.max_total_response_size, + DEFAULT_AUTO_CHECKPOINT, + applied_frame_no_receiver.clone(), + self.base.encryption_config.clone(), + Arc::new(AtomicBool::new(false)), // this is always false for write proxy + resolve_attach_path, + self.make_wal_manager.clone(), + ) + .await?; + + let connection_maker = MakeWriteProxyConn::new( channel.clone(), uri.clone(), stats.clone(), - broadcaster, - meta_store_handle.clone(), applied_frame_no_receiver, self.base.max_response_size, self.base.max_total_response_size, primary_current_replicatio_index, - None, - resolve_attach_path, - self.make_wal_manager.clone(), + self.base.encryption_config.clone(), + connection_maker, ) - .await? .throttled( self.base.max_concurrent_connections.clone(), Some(DB_CREATE_TIMEOUT), diff --git a/libsql-server/src/namespace/configurator/schema.rs b/libsql-server/src/namespace/configurator/schema.rs index f95c8abf51..add6d0e918 100644 --- a/libsql-server/src/namespace/configurator/schema.rs +++ b/libsql-server/src/namespace/configurator/schema.rs @@ -14,11 +14,11 @@ use crate::namespace::{ use crate::schema::SchedulerHandle; use super::helpers::{cleanup_primary, make_primary_connection_maker}; -use super::{BaseNamespaceConfig, ConfigureNamespace, PrimaryExtraConfig}; +use super::{BaseNamespaceConfig, ConfigureNamespace, PrimaryConfig}; pub struct SchemaConfigurator { base: BaseNamespaceConfig, - primary_config: PrimaryExtraConfig, + primary_config: PrimaryConfig, make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, migration_scheduler: SchedulerHandle, } @@ -26,7 +26,7 @@ pub struct SchemaConfigurator { impl SchemaConfigurator { pub fn new( base: BaseNamespaceConfig, - primary_config: PrimaryExtraConfig, + primary_config: PrimaryConfig, make_wal_manager: Arc InnerWalManager + Sync + Send + 'static>, migration_scheduler: SchedulerHandle, ) -> Self { @@ -76,8 +76,8 @@ impl ConfigureNamespace for SchemaConfigurator { db: Database::Schema(SchemaDatabase::new( self.migration_scheduler.clone(), name.clone(), - connection_maker, - wal_manager, + Arc::new(connection_maker), + Some(wal_manager), db_config.clone(), )), name: name.clone(), diff --git a/libsql-server/src/namespace/meta_store.rs b/libsql-server/src/namespace/meta_store.rs index 599dab9360..83f9c856ed 100644 --- a/libsql-server/src/namespace/meta_store.rs +++ b/libsql-server/src/namespace/meta_store.rs @@ -24,7 +24,7 @@ use crate::config::BottomlessConfig; use crate::connection::config::DatabaseConfig; use crate::schema::{MigrationDetails, MigrationSummary}; use crate::{ - config::MetaStoreConfig, connection::libsql::open_conn_active_checkpoint, error::Error, Result, + config::MetaStoreConfig, connection::legacy::open_conn_active_checkpoint, error::Error, Result, }; use super::NamespaceName; diff --git a/libsql-server/src/namespace/name.rs b/libsql-server/src/namespace/name.rs index e6335372a5..98fcc3d38b 100644 --- a/libsql-server/src/namespace/name.rs +++ b/libsql-server/src/namespace/name.rs @@ -57,6 +57,10 @@ impl NamespaceName { unsafe { std::str::from_utf8_unchecked(&self.0) } } + pub fn bytes(&self) -> Bytes { + self.0.clone() + } + pub fn from_bytes(bytes: Bytes) -> crate::Result { let s = std::str::from_utf8(&bytes).map_err(|_| Error::InvalidNamespace)?; Self::validate(s)?; diff --git a/libsql-server/src/query_result_builder.rs b/libsql-server/src/query_result_builder.rs index 9ba51a57d0..68dba3d478 100644 --- a/libsql-server/src/query_result_builder.rs +++ b/libsql-server/src/query_result_builder.rs @@ -525,7 +525,6 @@ impl QueryResultBuilder for Take { pub mod test { use std::fmt; - use crate::connection::program::Program; use arbitrary::{Arbitrary, Unstructured}; use itertools::Itertools; use rand::{ @@ -1037,14 +1036,14 @@ pub mod test { pub fn test_driver( iter: usize, - f: impl Fn(FsmQueryBuilder) -> crate::Result<(FsmQueryBuilder, Program)>, + f: impl Fn(FsmQueryBuilder) -> crate::Result, ) { for _ in 0..iter { // inject random errors let builder = FsmQueryBuilder::new(true); match f(builder) { Ok(b) => { - assert_eq!(b.0.state, Finish); + assert_eq!(b.state, Finish); } Err(e) => { assert!(matches!(e, crate::Error::BuilderError(_))); diff --git a/libsql-server/src/replication/primary/logger.rs b/libsql-server/src/replication/primary/logger.rs index 2b14db32a2..6213f0da50 100644 --- a/libsql-server/src/replication/primary/logger.rs +++ b/libsql-server/src/replication/primary/logger.rs @@ -882,7 +882,7 @@ mod test { use libsql_sys::wal::{Sqlite3WalManager, WalManager}; use super::*; - use crate::connection::libsql::open_conn; + use crate::connection::legacy::open_conn; use crate::replication::primary::replication_logger_wal::ReplicationLoggerWalWrapper; use crate::DEFAULT_AUTO_CHECKPOINT; diff --git a/libsql-server/src/replication/primary/replication_logger_wal.rs b/libsql-server/src/replication/primary/replication_logger_wal.rs index 6d7a268a81..defd8abf87 100644 --- a/libsql-server/src/replication/primary/replication_logger_wal.rs +++ b/libsql-server/src/replication/primary/replication_logger_wal.rs @@ -152,7 +152,7 @@ mod test { ); let wal_manager = ReplicationLoggerWalWrapper::new(logger.clone()); - let db = crate::connection::libsql::open_conn_active_checkpoint( + let db = crate::connection::legacy::open_conn_active_checkpoint( tmp.path(), Sqlite3WalManager::default().wrap(wal_manager), None, diff --git a/libsql-server/src/replication/snapshot_store.rs b/libsql-server/src/replication/snapshot_store.rs index e30d1659b1..21cec057e8 100644 --- a/libsql-server/src/replication/snapshot_store.rs +++ b/libsql-server/src/replication/snapshot_store.rs @@ -16,7 +16,7 @@ use tempfile::NamedTempFile; use uuid::Uuid; use zerocopy::{AsBytes, FromZeroes}; -use crate::connection::libsql::open_conn_active_checkpoint; +use crate::connection::legacy::open_conn_active_checkpoint; use crate::namespace::NamespaceName; use super::FrameNo; diff --git a/libsql-server/src/rpc/mod.rs b/libsql-server/src/rpc/mod.rs index 6359556518..5a15de5af2 100644 --- a/libsql-server/src/rpc/mod.rs +++ b/libsql-server/src/rpc/mod.rs @@ -1,7 +1,8 @@ use std::sync::Arc; use hyper_rustls::TlsAcceptor; -use libsql_replication::rpc::replication::NAMESPACE_METADATA_KEY; +use libsql_replication::rpc::replication::replication_log_server::ReplicationLogServer; +use libsql_replication::rpc::replication::{BoxReplicationService, NAMESPACE_METADATA_KEY}; use rustls::server::AllowAnyAuthenticatedClient; use rustls::RootCertStore; use tonic::Status; @@ -12,35 +13,23 @@ use tracing::Span; use crate::config::TlsConfig; use crate::metrics::CLIENT_VERSION; -use crate::namespace::{NamespaceName, NamespaceStore}; +use crate::namespace::NamespaceName; use crate::rpc::proxy::rpc::proxy_server::ProxyServer; use crate::rpc::proxy::ProxyService; -pub use crate::rpc::replication_log::rpc::replication_log_server::ReplicationLogServer; -use crate::rpc::replication_log::ReplicationLogService; use crate::utils::services::idle_shutdown::IdleShutdownKicker; pub mod proxy; pub mod replica_proxy; -pub mod replication_log; -pub mod replication_log_proxy; pub mod streaming_exec; +pub mod replication; pub async fn run_rpc_server( proxy_service: ProxyService, acceptor: A, maybe_tls: Option, idle_shutdown_layer: Option, - namespaces: NamespaceStore, - disable_namespaces: bool, + service: BoxReplicationService, ) -> anyhow::Result<()> { - let logger_service = ReplicationLogService::new( - namespaces.clone(), - idle_shutdown_layer.clone(), - None, - disable_namespaces, - false, - ); - if let Some(tls_config) = maybe_tls { let cert_pem = tokio::fs::read_to_string(&tls_config.cert).await?; let certs = rustls_pemfile::certs(&mut cert_pem.as_bytes())?; @@ -76,7 +65,7 @@ pub async fn run_rpc_server( let router = tonic::transport::Server::builder() .layer(&option_layer(idle_shutdown_layer)) .add_service(ProxyServer::new(proxy_service)) - .add_service(ReplicationLogServer::new(logger_service)) + .add_service(ReplicationLogServer::new(service)) .into_router(); let svc = ServiceBuilder::new() @@ -96,7 +85,7 @@ pub async fn run_rpc_server( hyper::server::Server::builder(acceptor).serve(h2c).await?; } else { let proxy = ProxyServer::new(proxy_service); - let replication = ReplicationLogServer::new(logger_service); + let replication = ReplicationLogServer::new(service); let router = tonic::transport::Server::builder() .layer(&option_layer(idle_shutdown_layer)) diff --git a/libsql-server/src/rpc/proxy.rs b/libsql-server/src/rpc/proxy.rs index 2899ab435c..80cbd37088 100644 --- a/libsql-server/src/rpc/proxy.rs +++ b/libsql-server/src/rpc/proxy.rs @@ -222,7 +222,7 @@ pub mod rpc { impl From for Program { fn from(pgm: connection::program::Program) -> Self { Self { - steps: pgm.steps.into_iter().map(|s| s.into()).collect(), + steps: pgm.steps.iter().map(|s| s.clone().into()).collect(), } } } diff --git a/libsql-server/src/rpc/replication/auth.rs b/libsql-server/src/rpc/replication/auth.rs new file mode 100644 index 0000000000..5d451804a6 --- /dev/null +++ b/libsql-server/src/rpc/replication/auth.rs @@ -0,0 +1,39 @@ +use tonic::Status; + +use crate::auth::parsers::parse_grpc_auth_header; +use crate::auth::{Auth, Jwt}; +use crate::namespace::{NamespaceName, NamespaceStore}; + +pub async fn authenticate( + namespaces: &NamespaceStore, + req: &tonic::Request, + namespace: NamespaceName, + user_auth_strategy: &Option, +) -> Result<(), Status> { + // todo dupe #auth + let namespace_jwt_keys = namespaces.with(namespace.clone(), |ns| ns.jwt_keys()).await; + + let auth = match namespace_jwt_keys { + Ok(Ok(Some(key))) => Some(Auth::new(Jwt::new(key))), + Ok(Ok(None)) => user_auth_strategy.clone(), + Err(e) => match e.as_ref() { + crate::error::Error::NamespaceDoesntExist(_) => user_auth_strategy.clone(), + _ => Err(Status::internal(format!( + "Error fetching jwt key for a namespace: {}", + e + )))?, + }, + Ok(Err(e)) => Err(Status::internal(format!( + "Error fetching jwt key for a namespace: {}", + e + )))?, + }; + + if let Some(auth) = auth { + let context = parse_grpc_auth_header(req.metadata(), &auth.user_strategy.required_fields()) + .map_err(|e| tonic::Status::internal(format!("Error parsing auth header: {}", e)))?; + auth.authenticate(context)?; + } + + Ok(()) +} diff --git a/libsql-server/src/rpc/replication/libsql_replicator.rs b/libsql-server/src/rpc/replication/libsql_replicator.rs new file mode 100644 index 0000000000..6376b12962 --- /dev/null +++ b/libsql-server/src/rpc/replication/libsql_replicator.rs @@ -0,0 +1,217 @@ +use std::mem::size_of; +use std::pin::Pin; +use std::sync::Arc; +use std::task::{ready, Context, Poll}; + +use bytes::Bytes; +use futures::stream::BoxStream; +use libsql_replication::rpc::replication::log_offset::WalFlavor; +use libsql_replication::rpc::replication::replication_log_server::ReplicationLog; +use libsql_replication::rpc::replication::{ + Frame as RpcFrame, Frames, HelloRequest, HelloResponse, LogOffset, NAMESPACE_DOESNT_EXIST, +}; +use libsql_wal::io::StdIO; +use libsql_wal::registry::WalRegistry; +use libsql_wal::segment::Frame; +use md5::{Digest as _, Md5}; +use tokio_stream::Stream; +use tonic::Status; +use uuid::Uuid; + +use crate::auth::Auth; +use crate::namespace::{NamespaceName, NamespaceStore}; +use crate::SqldStorage; + +pub struct LibsqlReplicationService { + registry: Arc>, + store: NamespaceStore, + user_auth_strategy: Option, + disable_namespaces: bool, + session_token: Bytes, +} + +impl LibsqlReplicationService { + pub fn new( + registry: Arc>, + store: NamespaceStore, + user_auth_strategy: Option, + disable_namespaces: bool, + ) -> Self { + let session_token = Uuid::new_v4().to_string().into(); + Self { + registry, + disable_namespaces, + store, + user_auth_strategy, + session_token, + } + } + + async fn authenticate( + &self, + req: &tonic::Request, + namespace: NamespaceName, + ) -> Result<(), Status> { + super::auth::authenticate(&self.store, req, namespace, &self.user_auth_strategy).await + } + + fn encode_session_token(&self, version: usize) -> Uuid { + let mut sha = Md5::new(); + sha.update(&self.session_token[..]); + sha.update(version.to_le_bytes()); + + let num = sha.finalize(); + let num = u128::from_le_bytes(num.into()); + Uuid::from_u128(num) + } +} + +pin_project_lite::pin_project! { + struct FrameStreamAdapter { + #[pin] + inner: S, + flavor: WalFlavor, + } +} + +impl FrameStreamAdapter { + fn new(inner: S, flavor: WalFlavor) -> Self { + Self { inner, flavor } + } +} + +impl Stream for FrameStreamAdapter +where + S: Stream, libsql_wal::replication::Error>>, +{ + type Item = Result; + + fn poll_next(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll> { + let this = self.project(); + match ready!(this.inner.poll_next(cx)) { + Some(Ok(f)) => { + match this.flavor { + WalFlavor::Libsql => { + // safety: frame implemements zerocopy traits, so it can safely be interpreted as a + // byte slize of the same size + let bytes: Box<[u8; size_of::()]> = unsafe { std::mem::transmute(f) }; + + let data = Bytes::from(bytes as Box<[u8]>); + Poll::Ready(Some(Ok(RpcFrame { + data, + timestamp: None, + }))) + } + WalFlavor::Sqlite => { + let header = libsql_replication::frame::FrameHeader { + frame_no: f.header().frame_no().into(), + checksum: 0.into(), + page_no: f.header().page_no().into(), + size_after: f.header().size_after().into(), + }; + + let frame = libsql_replication::frame::Frame::from_parts(&header, f.data()); + Poll::Ready(Some(Ok(RpcFrame { + data: frame.bytes(), + timestamp: None, + }))) + }, + } + } + Some(Err(_e)) => todo!(), + None => Poll::Ready(None), + } + } +} + +#[tonic::async_trait] +impl ReplicationLog for LibsqlReplicationService { + type LogEntriesStream = BoxStream<'static, Result>; + type SnapshotStream = BoxStream<'static, Result>; + + async fn log_entries( + &self, + req: tonic::Request, + ) -> Result, Status> { + let namespace = super::super::extract_namespace(self.disable_namespaces, &req)?; + self.authenticate(&req, namespace.clone()).await?; + let shared = self.registry.get_async(&namespace.into()).await.unwrap(); + let req = req.into_inner(); + // TODO: replicator should only accecpt NonZero + let replicator = libsql_wal::replication::replicator::Replicator::new( + shared, + req.next_offset.max(1), + ); + + let flavor = req.wal_flavor(); + let stream = FrameStreamAdapter::new(replicator.into_frame_stream(), flavor); + Ok(tonic::Response::new(Box::pin(stream))) + } + + async fn batch_log_entries( + &self, + _req: tonic::Request, + ) -> Result, Status> { + todo!() + // let namespace = super::super::extract_namespace(self.disable_namespaces, &req)?; + // self.authenticate(&req, namespace.clone()).await?; + // let shared = self.registry.get_async(&namespace.into()).await.unwrap(); + // let replicator = libsql_wal::replication::replicator::Replicator::new(shared, req.into_inner().next_offset); + // + // let frames = FrameStreamAdapter::new(replicator.into_frame_stream()) + // .take_while(|) + // .collect::, Status>>().await?; + // Ok(tonic::Response::new(Frames { frames })) + } + + async fn hello( + &self, + req: tonic::Request, + ) -> Result, Status> { + let namespace = super::super::extract_namespace(self.disable_namespaces, &req)?; + self.authenticate(&req, namespace.clone()).await?; + + let shared = self + .registry + .get_async(&namespace.clone().into()) + .await + .unwrap(); + let log_id = shared.log_id(); + let current_replication_index = shared.last_committed_frame_no(); + let (config, version) = self + .store + .with(namespace, |ns| -> Result<_, Status> { + let config = ns.config(); + let version = ns.config_version(); + Ok((config, version)) + }) + .await + .map_err(|e| { + if let crate::error::Error::NamespaceDoesntExist(_) = e.as_ref() { + Status::failed_precondition(NAMESPACE_DOESNT_EXIST) + } else { + Status::internal(e.to_string()) + } + })??; + + let session_hash = self.encode_session_token(version); + + let response = HelloResponse { + log_id: log_id.to_string(), + session_token: session_hash.to_string().into(), + generation_id: Uuid::from_u128(0).to_string(), + generation_start_index: 0, + current_replication_index: Some(current_replication_index), + config: Some(config.as_ref().into()), + }; + + Ok(tonic::Response::new(response)) + } + + async fn snapshot( + &self, + _req: tonic::Request, + ) -> Result, Status> { + Err(Status::unimplemented("no snapshot required with libsql wal")) + } +} diff --git a/libsql-server/src/rpc/replication/mod.rs b/libsql-server/src/rpc/replication/mod.rs new file mode 100644 index 0000000000..9058a40a91 --- /dev/null +++ b/libsql-server/src/rpc/replication/mod.rs @@ -0,0 +1,5 @@ +pub mod libsql_replicator; +pub mod replication_log; +pub mod replication_log_proxy; +mod auth; + diff --git a/libsql-server/src/rpc/replication_log.rs b/libsql-server/src/rpc/replication/replication_log.rs similarity index 89% rename from libsql-server/src/rpc/replication_log.rs rename to libsql-server/src/rpc/replication/replication_log.rs index 1ef306daf1..22c184392b 100644 --- a/libsql-server/src/rpc/replication_log.rs +++ b/libsql-server/src/rpc/replication/replication_log.rs @@ -20,8 +20,7 @@ use tonic::transport::server::TcpConnectInfo; use tonic::Status; use uuid::Uuid; -use crate::auth::Jwt; -use crate::auth::{parsers::parse_grpc_auth_header, Auth}; +use crate::auth::Auth; use crate::connection::config::DatabaseConfig; use crate::namespace::{NamespaceName, NamespaceStore}; use crate::replication::primary::frame_stream::FrameStream; @@ -29,7 +28,7 @@ use crate::replication::{LogReadError, ReplicationLogger}; use crate::stats::Stats; use crate::utils::services::idle_shutdown::IdleShutdownKicker; -use super::extract_namespace; +use crate::rpc::extract_namespace; pub struct ReplicationLogService { namespaces: NamespaceStore, @@ -72,38 +71,7 @@ impl ReplicationLogService { req: &tonic::Request, namespace: NamespaceName, ) -> Result<(), Status> { - // todo dupe #auth - let namespace_jwt_keys = self - .namespaces - .with(namespace.clone(), |ns| ns.jwt_keys()) - .await; - - let auth = match namespace_jwt_keys { - Ok(Ok(Some(key))) => Some(Auth::new(Jwt::new(key))), - Ok(Ok(None)) => self.user_auth_strategy.clone(), - Err(e) => match e.as_ref() { - crate::error::Error::NamespaceDoesntExist(_) => self.user_auth_strategy.clone(), - _ => Err(Status::internal(format!( - "Error fetching jwt key for a namespace: {}", - e - )))?, - }, - Ok(Err(e)) => Err(Status::internal(format!( - "Error fetching jwt key for a namespace: {}", - e - )))?, - }; - - if let Some(auth) = auth { - let context = - parse_grpc_auth_header(req.metadata(), &auth.user_strategy.required_fields()) - .map_err(|e| { - tonic::Status::internal(format!("Error parsing auth header: {}", e)) - })?; - auth.authenticate(context)?; - } - - Ok(()) + super::auth::authenticate(&self.namespaces, req, namespace, &self.user_auth_strategy).await } fn verify_session_token( @@ -263,7 +231,7 @@ impl ReplicationLog for ReplicationLogService { if let WalFlavor::Libsql = req.get_ref().wal_flavor() { return Err(Status::invalid_argument("libsql wal not supported")); } - let namespace = super::extract_namespace(self.disable_namespaces, &req)?; + let namespace = super::super::extract_namespace(self.disable_namespaces, &req)?; self.authenticate(&req, namespace.clone()).await?; @@ -311,7 +279,7 @@ impl ReplicationLog for ReplicationLogService { if let WalFlavor::Libsql = req.get_ref().wal_flavor() { return Err(Status::invalid_argument("libsql wal not supported")); } - let namespace = super::extract_namespace(self.disable_namespaces, &req)?; + let namespace = super::super::extract_namespace(self.disable_namespaces, &req)?; self.authenticate(&req, namespace.clone()).await?; let (logger, _, _, stats, _) = self.logger_from_namespace(namespace, &req, true).await?; @@ -346,7 +314,7 @@ impl ReplicationLog for ReplicationLogService { &self, req: tonic::Request, ) -> Result, Status> { - let namespace = super::extract_namespace(self.disable_namespaces, &req)?; + let namespace = super::super::extract_namespace(self.disable_namespaces, &req)?; self.authenticate(&req, namespace.clone()).await?; // legacy support @@ -385,9 +353,7 @@ impl ReplicationLog for ReplicationLogService { if let WalFlavor::Libsql = req.get_ref().wal_flavor() { return Err(Status::invalid_argument("libsql wal not supported")); } - - let namespace = super::extract_namespace(self.disable_namespaces, &req)?; - + let namespace = super::super::extract_namespace(self.disable_namespaces, &req)?; self.authenticate(&req, namespace.clone()).await?; let (logger, _, _, stats, _) = self.logger_from_namespace(namespace, &req, true).await?; diff --git a/libsql-server/src/rpc/replication_log_proxy.rs b/libsql-server/src/rpc/replication/replication_log_proxy.rs similarity index 100% rename from libsql-server/src/rpc/replication_log_proxy.rs rename to libsql-server/src/rpc/replication/replication_log_proxy.rs diff --git a/libsql-server/src/rpc/streaming_exec.rs b/libsql-server/src/rpc/streaming_exec.rs index 5214ace732..87a24b851a 100644 --- a/libsql-server/src/rpc/streaming_exec.rs +++ b/libsql-server/src/rpc/streaming_exec.rs @@ -367,7 +367,7 @@ pub mod test { use tokio_stream::wrappers::ReceiverStream; use crate::auth::Authenticated; - use crate::connection::libsql::LibSqlConnection; + use crate::connection::legacy::LegacyConnection; use crate::connection::program::Program; use crate::namespace::meta_store::{metastore_connection_maker, MetaStore}; use crate::namespace::NamespaceName; @@ -390,7 +390,7 @@ pub mod test { #[tokio::test] async fn invalid_request() { let tmp = tempdir().unwrap(); - let conn = LibSqlConnection::new_test(tmp.path()).await; + let conn = LegacyConnection::new_test(tmp.path()).await; let (snd, rcv) = mpsc::channel(1); let (maker, manager) = metastore_connection_maker(None, tmp.path()).await.unwrap(); let ctx = RequestContext::new( @@ -416,7 +416,7 @@ pub mod test { #[tokio::test] async fn request_stream_dropped() { let tmp = tempdir().unwrap(); - let conn = LibSqlConnection::new_test(tmp.path()).await; + let conn = LegacyConnection::new_test(tmp.path()).await; let (snd, rcv) = mpsc::channel(1); let (maker, manager) = metastore_connection_maker(None, tmp.path()).await.unwrap(); let ctx = RequestContext::new( @@ -438,7 +438,7 @@ pub mod test { #[tokio::test] async fn perform_query_simple() { let tmp = tempdir().unwrap(); - let conn = LibSqlConnection::new_test(tmp.path()).await; + let conn = LegacyConnection::new_test(tmp.path()).await; let (snd, rcv) = mpsc::channel(1); let (maker, manager) = metastore_connection_maker(None, tmp.path()).await.unwrap(); let ctx = RequestContext::new( @@ -462,7 +462,7 @@ pub mod test { #[tokio::test] async fn single_query_split_response() { let tmp = tempdir().unwrap(); - let conn = LibSqlConnection::new_test(tmp.path()).await; + let conn = LegacyConnection::new_test(tmp.path()).await; let (snd, rcv) = mpsc::channel(1); let (maker, manager) = metastore_connection_maker(None, tmp.path()).await.unwrap(); let ctx = RequestContext::new( @@ -519,7 +519,7 @@ pub mod test { #[tokio::test] async fn request_interupted() { let tmp = tempdir().unwrap(); - let conn = LibSqlConnection::new_test(tmp.path()).await; + let conn = LegacyConnection::new_test(tmp.path()).await; let (snd, rcv) = mpsc::channel(2); let (maker, manager) = metastore_connection_maker(None, tmp.path()).await.unwrap(); let ctx = RequestContext::new( @@ -546,7 +546,7 @@ pub mod test { #[tokio::test] async fn perform_multiple_queries() { let tmp = tempdir().unwrap(); - let conn = LibSqlConnection::new_test(tmp.path()).await; + let conn = LegacyConnection::new_test(tmp.path()).await; let (snd, rcv) = mpsc::channel(1); let (maker, manager) = metastore_connection_maker(None, tmp.path()).await.unwrap(); let ctx = RequestContext::new( @@ -573,7 +573,7 @@ pub mod test { #[tokio::test] async fn query_number_less_than_previous_query() { let tmp = tempdir().unwrap(); - let conn = LibSqlConnection::new_test(tmp.path()).await; + let conn = LegacyConnection::new_test(tmp.path()).await; let (snd, rcv) = mpsc::channel(1); let (maker, manager) = metastore_connection_maker(None, tmp.path()).await.unwrap(); let ctx = RequestContext::new( @@ -602,7 +602,7 @@ pub mod test { #[tokio::test] async fn describe() { let tmp = tempdir().unwrap(); - let conn = LibSqlConnection::new_test(tmp.path()).await; + let conn = LegacyConnection::new_test(tmp.path()).await; let (snd, rcv) = mpsc::channel(1); let (maker, manager) = metastore_connection_maker(None, tmp.path()).await.unwrap(); let ctx = RequestContext::new( diff --git a/libsql-server/src/schema/migration.rs b/libsql-server/src/schema/migration.rs index 73135db470..e0cfe52759 100644 --- a/libsql-server/src/schema/migration.rs +++ b/libsql-server/src/schema/migration.rs @@ -240,7 +240,7 @@ mod test { use libsql_sys::wal::Sqlite3WalManager; use tempfile::tempdir; - use crate::connection::libsql::open_conn_active_checkpoint; + use crate::connection::legacy::open_conn_active_checkpoint; use crate::namespace::NamespaceName; use crate::schema::status::MigrationTask; diff --git a/libsql-server/src/schema/mod.rs b/libsql-server/src/schema/mod.rs index d024586ee1..e7c7681262 100644 --- a/libsql-server/src/schema/mod.rs +++ b/libsql-server/src/schema/mod.rs @@ -60,7 +60,7 @@ pub fn validate_migration(migration: &mut Program) -> Result<(), Error> { ) { return Err(Error::MigrationContainsTransactionStatements); } - migration.steps[0].query = Query { + migration.steps_mut().unwrap()[0].query = Query { stmt: Statement::parse("PRAGMA max_page_count") .next() .unwrap() @@ -72,7 +72,7 @@ pub fn validate_migration(migration: &mut Program) -> Result<(), Error> { if !matches!(step.query.stmt.kind, StmtKind::TxnEnd) { break; } - migration.steps.pop(); + migration.steps_mut().unwrap().pop(); } } if migration.steps().iter().any(|s| s.query.stmt.kind.is_txn()) { diff --git a/libsql-server/src/schema/scheduler.rs b/libsql-server/src/schema/scheduler.rs index 01a3d795d8..d3ea6ddea7 100644 --- a/libsql-server/src/schema/scheduler.rs +++ b/libsql-server/src/schema/scheduler.rs @@ -10,7 +10,6 @@ use tokio::task::JoinSet; use crate::connection::program::Program; use crate::connection::{Connection, MakeConnection}; -use crate::database::PrimaryConnectionMaker; use crate::namespace::meta_store::{MetaStore, MetaStoreConnection}; use crate::namespace::{NamespaceName, NamespaceStore}; use crate::query_result_builder::{IgnoreResult, QueryBuilderConfig}; @@ -349,10 +348,8 @@ impl Scheduler { let (connection_maker, block_writes) = self.namespace_store .with(task.namespace(), move |ns| { - let db = ns.db.as_primary().expect( - "attempting to perform schema migration on non-primary database", - ); - (db.connection_maker().clone(), db.block_writes.clone()) + assert!(ns.db.is_primary(), "attempting to perform schema migration on non-primary database"); + (ns.db.connection_maker().clone(), ns.db.block_writes().unwrap()) }) .await .map_err(|e| Error::NamespaceLoad(Box::new(e)))?; @@ -426,7 +423,7 @@ async fn try_step_task( _permit: OwnedSemaphorePermit, namespace_store: NamespaceStore, migration_db: Arc>, - connection_maker: Arc, + connection_maker: Arc>, job_status: MigrationJobStatus, migration: Arc, mut task: MigrationTask, @@ -477,7 +474,7 @@ async fn try_step_task( async fn try_step_task_inner( namespace_store: NamespaceStore, - connection_maker: Arc, + connection_maker: Arc>, job_status: MigrationJobStatus, migration: Arc, task: &MigrationTask, @@ -739,11 +736,8 @@ async fn step_job_run_success( // TODO: check that all tasks actually reported success before migration let connection_maker = namespace_store .with(schema.clone(), |ns| { - ns.db - .as_schema() - .expect("expected database to be a schema database") - .connection_maker() - .clone() + assert!(ns.db.is_schema(), "expected database to be a schema database"); + ns.db.connection_maker() }) .await .map_err(|e| Error::NamespaceLoad(Box::new(e)))?; @@ -754,7 +748,6 @@ async fn step_job_run_success( .map_err(|e| Error::FailedToConnect(schema.clone(), e.into()))?; tokio::task::spawn_blocking(move || -> Result<(), Error> { connection - .connection() .with_raw(|conn| -> Result<(), Error> { let mut txn = conn.transaction()?; let schema_version = @@ -809,7 +802,7 @@ mod test { use crate::connection::config::DatabaseConfig; use crate::database::DatabaseKind; use crate::namespace::configurator::{ - BaseNamespaceConfig, NamespaceConfigurators, PrimaryConfigurator, PrimaryExtraConfig, + BaseNamespaceConfig, NamespaceConfigurators, PrimaryConfigurator, PrimaryConfig, SchemaConfigurator, }; use crate::namespace::meta_store::{metastore_connection_maker, MetaStore}; @@ -863,9 +856,10 @@ mod test { let (block_write, ns_conn_maker) = store .with("ns".into(), |ns| { + assert!(ns.db.is_primary()); ( - ns.db.as_primary().unwrap().block_writes.clone(), - ns.db.as_primary().unwrap().connection_maker(), + ns.db.block_writes().unwrap(), + ns.db.connection_maker(), ) }) .await @@ -920,7 +914,7 @@ mod test { encryption_config: None, }; - let primary_config = PrimaryExtraConfig { + let primary_config = PrimaryConfig { max_log_size: 1000000000, max_log_duration: None, bottomless_replication: None, @@ -989,9 +983,10 @@ mod test { let (block_write, ns_conn_maker) = store .with("ns".into(), |ns| { + assert!(ns.db.is_primary()); ( - ns.db.as_primary().unwrap().block_writes.clone(), - ns.db.as_primary().unwrap().connection_maker(), + ns.db.block_writes().unwrap(), + ns.db.connection_maker(), ) }) .await @@ -1040,12 +1035,13 @@ mod test { store .with("ns".into(), |ns| { + assert!(ns.db.is_primary()); assert!(ns .db - .as_primary() + .block_writes() .unwrap() - .block_writes - .load(std::sync::atomic::Ordering::Relaxed)); + .load(std::sync::atomic::Ordering::Relaxed) + ); }) .await .unwrap(); diff --git a/libsql-server/src/test/bottomless.rs b/libsql-server/src/test/bottomless.rs index 5f3015e11e..159dae0226 100644 --- a/libsql-server/src/test/bottomless.rs +++ b/libsql-server/src/test/bottomless.rs @@ -458,7 +458,6 @@ async fn remove_snapshots(bucket: &str) { if let Ok(out) = client.list_objects().bucket(bucket).send().await { let keys = out .contents() - .unwrap() .iter() .map(|o| { let key = o.key().unwrap(); @@ -466,7 +465,7 @@ async fn remove_snapshots(bucket: &str) { format!("{}/db.gz", prefix) }) .unique() - .map(|key| ObjectIdentifier::builder().key(key).build()) + .map(|key| ObjectIdentifier::builder().key(key).build().unwrap()) .collect(); client @@ -476,7 +475,7 @@ async fn remove_snapshots(bucket: &str) { Delete::builder() .set_objects(Some(keys)) .quiet(true) - .build(), + .build().unwrap(), ) .send() .await @@ -489,7 +488,7 @@ async fn remove_snapshots(bucket: &str) { async fn assert_bucket_occupancy(bucket: &str, expect_empty: bool) { let client = s3_client().await.unwrap(); if let Ok(out) = client.list_objects().bucket(bucket).send().await { - let contents = out.contents().unwrap_or_default(); + let contents = out.contents(); if expect_empty { assert!( contents.is_empty(), @@ -545,17 +544,17 @@ impl S3BucketCleaner { let client = s3_client().await?; let objects = client.list_objects().bucket(bucket).send().await?; let mut delete_keys = Vec::new(); - for o in objects.contents().unwrap_or_default() { + for o in objects.contents() { let id = ObjectIdentifier::builder() .set_key(o.key().map(String::from)) - .build(); + .build().unwrap(); delete_keys.push(id); } let _ = client .delete_objects() .bucket(bucket) - .delete(Delete::builder().set_objects(Some(delete_keys)).build()) + .delete(Delete::builder().set_objects(Some(delete_keys)).build().unwrap()) .send() .await?; diff --git a/libsql-wal/src/checkpointer.rs b/libsql-wal/src/checkpointer.rs index 1f389bc265..96a7b83bd7 100644 --- a/libsql-wal/src/checkpointer.rs +++ b/libsql-wal/src/checkpointer.rs @@ -158,6 +158,7 @@ where notified = self.recv.recv(), if !self.shutting_down => { match notified { Some(CheckpointMessage::Namespace(namespace)) => { + tracing::info!(namespace = namespace.as_str(), "notified for checkpoint"); self.scheduled.insert(namespace); } None | Some(CheckpointMessage::Shutdown) => { diff --git a/libsql-wal/src/io/file.rs b/libsql-wal/src/io/file.rs index 63e5cda235..d7f581c145 100644 --- a/libsql-wal/src/io/file.rs +++ b/libsql-wal/src/io/file.rs @@ -2,6 +2,8 @@ use std::fs::File; use std::future::Future; use std::io::{self, ErrorKind, IoSlice, Result, Write}; +use libsql_sys::wal::either::Either; + use super::buf::{IoBuf, IoBufMut}; pub trait FileExt: Send + Sync + 'static { @@ -73,6 +75,91 @@ pub trait FileExt: Send + Sync + 'static { ) -> impl Future)> + Send; } +impl FileExt for Either +where V: FileExt, U: FileExt, +{ + fn len(&self) -> io::Result { + match self { + Either::A(x) => x.len(), + Either::B(x) => x.len(), + } + } + + fn write_at_vectored(&self, bufs: &[IoSlice], offset: u64) -> Result { + match self { + Either::A(x) => x.write_at_vectored(bufs, offset), + Either::B(x) => x.write_at_vectored(bufs, offset), + } + } + + fn write_at(&self, buf: &[u8], offset: u64) -> Result { + match self { + Either::A(x) => x.write_at(buf, offset), + Either::B(x) => x.write_at(buf, offset), + } + } + + fn read_at(&self, buf: &mut [u8], offset: u64) -> Result { + match self { + Either::A(x) => x.read_at(buf, offset), + Either::B(x) => x.read_at(buf, offset), + } + } + + fn sync_all(&self) -> Result<()> { + match self { + Either::A(x) => x.sync_all(), + Either::B(x) => x.sync_all(), + } + } + + fn set_len(&self, len: u64) -> Result<()> { + match self { + Either::A(x) => x.set_len(len), + Either::B(x) => x.set_len(len), + } + } + + fn read_exact_at_async( + &self, + buf: B, + offset: u64, + ) -> impl Future)> + Send { + async move { + match self { + Either::A(x) => x.read_exact_at_async(buf, offset).await, + Either::B(x) => x.read_exact_at_async(buf, offset).await, + } + } + } + + fn read_at_async( + &self, + buf: B, + offset: u64, + ) -> impl Future)> + Send { + async move { + match self { + Either::A(x) => x.read_at_async(buf, offset).await, + Either::B(x) => x.read_at_async(buf, offset).await, + } + } + } + + fn write_all_at_async( + &self, + buf: B, + offset: u64, + ) -> impl Future)> + Send { + async move { + match self { + Either::A(x) => x.write_all_at_async(buf, offset).await, + Either::B(x) => x.write_all_at_async(buf, offset).await, + } + } + } +} + impl FileExt for File { fn write_at_vectored(&self, bufs: &[IoSlice], offset: u64) -> Result { Ok(nix::sys::uio::pwritev(self, bufs, offset as _)?) diff --git a/libsql-wal/src/io/mod.rs b/libsql-wal/src/io/mod.rs index b7a618617d..daa4e7f904 100644 --- a/libsql-wal/src/io/mod.rs +++ b/libsql-wal/src/io/mod.rs @@ -30,11 +30,17 @@ pub trait Io: Send + Sync + 'static { // todo: create an async counterpart fn tempfile(&self) -> io::Result; fn now(&self) -> DateTime; - fn uuid(&self) -> Uuid; fn hard_link(&self, src: &Path, dst: &Path) -> io::Result<()>; fn with_rng(&self, f: F) -> R where F: FnOnce(&mut Self::Rng) -> R; + fn uuid(&self) -> uuid::Uuid { + self.with_rng(|rng| { + let n: u128 = rng.gen(); + Uuid::from_u128(n) + }) + } + } #[derive(Default, Debug, Clone, Copy)] diff --git a/libsql-wal/src/lib.rs b/libsql-wal/src/lib.rs index 1c0dc63566..e8309541c4 100644 --- a/libsql-wal/src/lib.rs +++ b/libsql-wal/src/lib.rs @@ -15,7 +15,8 @@ const LIBSQL_MAGIC: u64 = u64::from_be_bytes(*b"LIBSQL\0\0"); const LIBSQL_PAGE_SIZE: u16 = 4096; const LIBSQL_WAL_VERSION: u16 = 1; -use zerocopy::byteorder::big_endian::{U16 as bu16, U64 as bu64}; +use uuid::Uuid; +use zerocopy::byteorder::big_endian::{U16 as bu16, U64 as bu64, U128 as bu128}; /// LibsqlFooter is located at the end of the libsql file. I contains libsql specific metadata, /// while remaining fully compatible with sqlite (which just ignores that footer) /// @@ -29,6 +30,14 @@ pub struct LibsqlFooter { /// only valid if there are no outstanding segments to checkpoint, since a checkpoint could be /// partial. pub replication_index: bu64, + /// Id of the log for this this database + pub log_id: bu128, +} + +impl LibsqlFooter { + pub fn log_id(&self) -> Uuid { + Uuid::from_u128(self.log_id.get()) + } } #[cfg(any(debug_assertions, test))] @@ -117,6 +126,7 @@ pub mod test { self.tmp.path().join(namespace) } + #[track_caller] pub fn open_conn(&self, namespace: &'static str) -> libsql_sys::Connection> { let path = self.db_path(namespace); let wal = self.wal.clone(); diff --git a/libsql-wal/src/registry.rs b/libsql-wal/src/registry.rs index 103bbf8631..204448ca19 100644 --- a/libsql-wal/src/registry.rs +++ b/libsql-wal/src/registry.rs @@ -1,3 +1,4 @@ +use std::io; use std::num::NonZeroU64; use std::path::{Path, PathBuf}; use std::sync::atomic::{AtomicBool, Ordering}; @@ -9,6 +10,7 @@ use parking_lot::{Condvar, Mutex}; use rand::Rng; use tokio::sync::{mpsc, Notify, Semaphore}; use tokio::task::JoinSet; +use uuid::Uuid; use zerocopy::{AsBytes, FromZeroes}; use crate::checkpointer::CheckpointMessage; @@ -21,6 +23,7 @@ use crate::segment::{current::CurrentSegment, sealed::SealedSegment}; use crate::shared_wal::{SharedWal, SwapLog}; use crate::storage::{OnStoreCallback, Storage}; use crate::transaction::TxGuard; +use crate::{LibsqlFooter, LIBSQL_PAGE_SIZE}; use libsql_sys::name::NamespaceName; enum Slot { @@ -115,6 +118,7 @@ where current.db_size(), current.tail().clone(), salt, + current.log_id() )?; // sealing must the last fallible operation, because we don't want to end up in a situation // where the current log is sealed and it wasn't swapped. @@ -267,10 +271,29 @@ where } let db_file = self.io.open(false, true, true, db_path)?; - let mut header: Sqlite3DbHeader = Sqlite3DbHeader::new_zeroed(); db_file.read_exact_at(header.as_bytes_mut(), 0)?; + let log_id = if db_file.len()? <= LIBSQL_PAGE_SIZE as u64 && tail.is_empty() { + // this is a new database + self.io.uuid() + } else if let Some(log_id) = tail.with_head(|h| h.header().log_id.get()) { + // there is a segment list, read the logid from there. + let log_id = Uuid::from_u128(log_id); + #[cfg(debug_assertions)] + { + // if the main db file has footer, then the logid must match that of the segment + if let Ok(db_log_id) = read_log_id_from_footer(&db_file, header.db_size.get() as u64) { + assert_eq!(db_log_id, log_id); + } + } + + log_id + } else { + read_log_id_from_footer(&db_file, header.db_size.get() as u64)? + }; + + let (db_size, next_frame_no) = tail .with_head(|segment| { let header = segment.header(); @@ -294,6 +317,7 @@ where db_size, tail.into(), salt, + log_id, )?)); let (new_frame_notifier, _) = tokio::sync::watch::channel(next_frame_no.get() - 1); @@ -313,6 +337,7 @@ where )), shutdown: false.into(), checkpoint_notifier: self.checkpoint_notifier.clone(), + max_segment_size: 1000.into(), }); self.opened @@ -370,6 +395,19 @@ where .await; self.checkpoint_notifier.closed().await; + // todo: shutdown storage + // self.storage.shutdown().await; + Ok(()) } } + +fn read_log_id_from_footer(db_file: &F, db_size: u64) -> io::Result { + let mut footer: LibsqlFooter = LibsqlFooter::new_zeroed(); + let footer_offset = LIBSQL_PAGE_SIZE as u64 * db_size; + // FIXME: failing to read the footer here is a sign of corrupted database: either we + // have a tail to the segment list, or we have fully checkpointed the database. Can we + // recover from that? + db_file.read_exact_at(footer.as_bytes_mut(), footer_offset)?; + Ok(footer.log_id()) +} diff --git a/libsql-wal/src/replication/injector.rs b/libsql-wal/src/replication/injector.rs index a922330102..3a152b412e 100644 --- a/libsql-wal/src/replication/injector.rs +++ b/libsql-wal/src/replication/injector.rs @@ -57,6 +57,7 @@ impl Injector { .inject_frames(buffer, commit_data, &mut self.tx) .await?; self.buffer = buffer; + self.buffer.clear(); Ok(()) } @@ -82,8 +83,8 @@ mod test { let primary_conn = primary_env.open_conn("test"); let primary_shared = primary_env.shared("test"); - let mut replicator = Replicator::new(primary_shared.clone(), 1); - let stream = replicator.frame_stream(); + let replicator = Replicator::new(primary_shared.clone(), 1); + let stream = replicator.into_frame_stream(); tokio::pin!(stream); diff --git a/libsql-wal/src/replication/replicator.rs b/libsql-wal/src/replication/replicator.rs index acb5230ed7..46937f01a8 100644 --- a/libsql-wal/src/replication/replicator.rs +++ b/libsql-wal/src/replication/replicator.rs @@ -40,20 +40,20 @@ impl Replicator { /// /// In a single replication step, the replicator guarantees that a minimal set of frames is /// sent to the replica. - pub fn frame_stream(&mut self) -> impl Stream>> + '_ { + pub fn into_frame_stream(mut self) -> impl Stream>> + Send { async_stream::try_stream! { loop { // First we decide up to what frame_no we want to replicate in this step. If we are // already up to date, wait for something to happen let most_recent_frame_no = *self .new_frame_notifier - .wait_for(|fno| *fno > self.next_frame_no) + .wait_for(|fno| *fno >= self.next_frame_no) .await .expect("channel cannot be closed because we hold a ref to the sending end"); let mut commit_frame_no = 0; // we have stuff to replicate - if most_recent_frame_no > self.next_frame_no { + if most_recent_frame_no >= self.next_frame_no { // first replicate the most recent version of each page from the current // segment. We also return how far we have replicated from the current log let current = self.shared.current.load(); @@ -162,10 +162,10 @@ mod test { .unwrap(); } - let mut replicator = Replicator::new(shared.clone(), 1); + let replicator = Replicator::new(shared.clone(), 1); let tmp = NamedTempFile::new().unwrap(); - let stream = replicator.frame_stream(); + let stream = replicator.into_frame_stream(); tokio::pin!(stream); let mut last_frame_no = 0; let mut size_after; @@ -233,8 +233,8 @@ mod test { // replicate everything from scratch again { let tmp = NamedTempFile::new().unwrap(); - let mut replicator = Replicator::new(shared.clone(), 1); - let stream = replicator.frame_stream(); + let replicator = Replicator::new(shared.clone(), 1); + let stream = replicator.into_frame_stream(); tokio::pin!(stream); @@ -295,8 +295,8 @@ mod test { let db_content = std::fs::read(&env.db_path("test").join("data")).unwrap(); - let mut replicator = Replicator::new(shared, 1); - let stream = replicator.frame_stream().take(3); + let replicator = Replicator::new(shared, 1); + let stream = replicator.into_frame_stream().take(3); tokio::pin!(stream); diff --git a/libsql-wal/src/replication/storage.rs b/libsql-wal/src/replication/storage.rs index 6972c0c6e2..35ea89fb09 100644 --- a/libsql-wal/src/replication/storage.rs +++ b/libsql-wal/src/replication/storage.rs @@ -18,7 +18,7 @@ pub trait ReplicateFromStorage: Sync + Send + 'static { seen: &'a mut RoaringBitmap, current: u64, until: u64, - ) -> Pin>> + 'a>>; + ) -> Pin>> + 'a + Send>>; } pub struct StorageReplicator { @@ -41,7 +41,7 @@ where seen: &'a mut roaring::RoaringBitmap, mut current: u64, until: u64, - ) -> Pin>> + 'a>> { + ) -> Pin>> + Send + 'a>> { Box::pin(async_stream::try_stream! { loop { let key = self.storage.find_segment(&self.namespace, current, None).await?; diff --git a/libsql-wal/src/segment/compacted.rs b/libsql-wal/src/segment/compacted.rs index 9fd65f045b..f8bcf340d5 100644 --- a/libsql-wal/src/segment/compacted.rs +++ b/libsql-wal/src/segment/compacted.rs @@ -52,6 +52,17 @@ pub struct CompactedSegment { file: F, } +impl CompactedSegment { + pub fn remap_file_type(self, f: FN) -> CompactedSegment + where FN: FnOnce(F) -> T, + { + CompactedSegment { + header: self.header, + file: f(self.file) + } + } +} + impl CompactedSegment { pub(crate) async fn open(file: F) -> Result { let buf = ZeroCopyBuf::new_uninit(); diff --git a/libsql-wal/src/segment/current.rs b/libsql-wal/src/segment/current.rs index bda6d5742a..6ab829b515 100644 --- a/libsql-wal/src/segment/current.rs +++ b/libsql-wal/src/segment/current.rs @@ -14,6 +14,7 @@ use fst::MapBuilder; use parking_lot::{Mutex, RwLock}; use roaring::RoaringBitmap; use tokio_stream::Stream; +use uuid::Uuid; use zerocopy::little_endian::U32; use zerocopy::{AsBytes, FromZeroes}; @@ -54,6 +55,7 @@ impl CurrentSegment { db_size: u32, tail: Arc>>, salt: u32, + log_id: Uuid, ) -> Result where F: FileExt, @@ -70,6 +72,7 @@ impl CurrentSegment { version: LIBSQL_WAL_VERSION.into(), salt: salt.into(), page_size: LIBSQL_PAGE_SIZE.into(), + log_id: log_id.as_u128().into(), }; header.recompute_checksum(); @@ -88,6 +91,10 @@ impl CurrentSegment { }) } + pub fn log_id(&self) -> Uuid { + Uuid::from_u128(self.header.lock().log_id.get()) + } + pub fn is_empty(&self) -> bool { self.count_committed() == 0 } diff --git a/libsql-wal/src/segment/list.rs b/libsql-wal/src/segment/list.rs index f1e3252161..4cc5652dae 100644 --- a/libsql-wal/src/segment/list.rs +++ b/libsql-wal/src/segment/list.rs @@ -4,11 +4,11 @@ use std::sync::atomic::{AtomicBool, AtomicUsize, Ordering}; use std::sync::Arc; use arc_swap::ArcSwapOption; -use fst::map::{OpBuilder, Union}; use fst::raw::IndexedValue; use fst::Streamer; use roaring::RoaringBitmap; use tokio_stream::Stream; +use uuid::Uuid; use zerocopy::FromZeroes; use crate::error::Result; @@ -78,7 +78,12 @@ where /// Checkpoints as many segments as possible to the main db file, and return the checkpointed /// frame_no, if anything was checkpointed - pub async fn checkpoint(&self, db_file: &F, until_frame_no: u64) -> Result> + pub async fn checkpoint( + &self, + db_file: &F, + until_frame_no: u64, + log_id: Uuid, + ) -> Result> where F: FileExt, { @@ -122,24 +127,15 @@ where let size_after = segs.first().unwrap().size_after(); - let union = segs + let index_iter = segs .iter() - .map(|s| s.index()) - .collect::() - .union(); - - /// Safety: Union contains a Box that doesn't require Send, to it's not send. - /// That's an issue for us, but all the indexes we have are safe to send, so we're good. - /// FIXME: we could implement union ourselves. - unsafe impl Send for SendUnion<'_> {} - unsafe impl Sync for SendUnion<'_> {} - struct SendUnion<'a>(Union<'a>); + .map(|s| s.index()); - let mut union = SendUnion(union); + let mut union = send_fst_ops::SendUnion::from_index_iter(index_iter); let mut buf = ZeroCopyBuf::::new_uninit(); let mut last_replication_index = 0; - while let Some((k, v)) = union.0.next() { + while let Some((k, v)) = union.next() { let page_no = u32::from_be_bytes(k.try_into().unwrap()); let v = v.iter().min_by_key(|i| i.index).unwrap(); let offset = v.value as u32; @@ -163,6 +159,7 @@ where magic: LIBSQL_MAGIC.into(), version: LIBSQL_WAL_VERSION.into(), replication_index: last_replication_index.into(), + log_id: log_id.as_u128().into(), }; let footer_offset = size_after as usize * LIBSQL_PAGE_SIZE as usize; @@ -241,7 +238,8 @@ where .max(until_fno); let stream = async_stream::try_stream! { - let mut union = fst::map::OpBuilder::from_iter(segments.iter().map(|s| s.index())).union(); + let index_iter = segments.iter().map(|s| s.index()); + let mut union = send_fst_ops::SendUnion::from_index_iter(index_iter); while let Some((key_bytes, indexes)) = union.next() { let page_no = u32::from_be_bytes(key_bytes.try_into().unwrap()); // we already have a more recent version of this page. @@ -337,6 +335,47 @@ impl List { } } +mod send_fst_ops { + use std::sync::Arc; + use std::ops::{Deref, DerefMut}; + + use fst::map::{OpBuilder, Union}; + + /// Safety: Union contains a Box that doesn't require Send, to it's not send. + /// That's an issue for us, but all the indexes we have are safe to send, so we're good. + /// FIXME: we could implement union ourselves. + unsafe impl Send for SendUnion<'_> {} + unsafe impl Sync for SendUnion<'_> {} + + #[repr(transparent)] + pub(super) struct SendUnion<'a>(Union<'a>); + + impl<'a> SendUnion<'a> { + pub fn from_index_iter(iter: I) -> Self + where + I: Iterator>>, + { + let op = iter.collect::().union(); + Self(op) + } + } + + impl<'a> Deref for SendUnion<'a> { + type Target = Union<'a>; + + fn deref(&self) -> &Self::Target { + &self.0 + } + } + + impl<'a> DerefMut for SendUnion<'a> { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } + } + +} + #[cfg(test)] mod test { use std::io::{Read, Seek, Write}; diff --git a/libsql-wal/src/segment/mod.rs b/libsql-wal/src/segment/mod.rs index 98d93bfe50..3f96589526 100644 --- a/libsql-wal/src/segment/mod.rs +++ b/libsql-wal/src/segment/mod.rs @@ -15,7 +15,7 @@ use std::mem::size_of; use std::num::NonZeroU64; use std::sync::Arc; -use zerocopy::byteorder::little_endian::{U16, U32, U64}; +use zerocopy::byteorder::little_endian::{U16, U32, U64, U128}; use zerocopy::AsBytes; use crate::error::{Error, Result}; @@ -62,6 +62,7 @@ pub struct SegmentHeader { /// right now we only support 4096, but if se decided to support other sizes, /// we could do it without changing the header pub page_size: U16, + pub log_id: U128, /// checksum of the header fields, excluding the checksum itself. This field must be the last pub header_cheksum: U32, diff --git a/libsql-wal/src/shared_wal.rs b/libsql-wal/src/shared_wal.rs index 461ad13e03..09c107bf24 100644 --- a/libsql-wal/src/shared_wal.rs +++ b/libsql-wal/src/shared_wal.rs @@ -1,5 +1,5 @@ use std::collections::BTreeMap; -use std::sync::atomic::{AtomicBool, AtomicU64, Ordering}; +use std::sync::atomic::{AtomicBool, AtomicU64, AtomicUsize, Ordering}; use std::sync::Arc; use std::time::Instant; @@ -8,6 +8,7 @@ use crossbeam::deque::Injector; use crossbeam::sync::Unparker; use parking_lot::{Mutex, MutexGuard}; use tokio::sync::mpsc; +use uuid::Uuid; use crate::checkpointer::CheckpointMessage; use crate::error::{Error, Result}; @@ -51,6 +52,8 @@ pub struct SharedWal { pub(crate) stored_segments: Box, pub(crate) shutdown: AtomicBool, pub(crate) checkpoint_notifier: mpsc::Sender, + /// maximum size the segment is allowed to grow + pub(crate) max_segment_size: AtomicUsize, } impl SharedWal { @@ -73,6 +76,10 @@ impl SharedWal { self.current.load().db_size() } + pub fn log_id(&self) -> Uuid { + self.current.load().log_id() + } + #[tracing::instrument(skip_all)] pub fn begin_read(&self, conn_id: u64) -> ReadTransaction { // FIXME: this is not enough to just increment the counter, we must make sure that the segment @@ -230,21 +237,6 @@ impl SharedWal { } } - // The replication index from page 1 must match that of the SharedWal - #[cfg(debug_assertions)] - { - use libsql_sys::ffi::Sqlite3DbHeader; - use zerocopy::FromBytes; - - if page_no == 1 { - let header = Sqlite3DbHeader::read_from_prefix(buffer).unwrap(); - assert_eq!( - header.replication_index.get(), - self.checkpointed_frame_no.load(Ordering::Relaxed) - ); - } - } - tx.pages_read += 1; Ok(()) @@ -264,7 +256,7 @@ impl SharedWal { } // TODO: use config for max log size - if tx.is_commited() && current.count_committed() > 1000 { + if tx.is_commited() && current.count_committed() > self.max_segment_size.load(Ordering::Relaxed) { self.swap_current(&tx)?; } @@ -297,8 +289,9 @@ impl SharedWal { .current .load() .tail() - .checkpoint(&self.db_file, durable_frame_no) + .checkpoint(&self.db_file, durable_frame_no, self.log_id()) .await?; + dbg!(checkpointed_frame_no); if let Some(checkpointed_frame_no) = checkpointed_frame_no { self.checkpointed_frame_no .store(checkpointed_frame_no, Ordering::SeqCst); diff --git a/libsql-wal/src/storage/async_storage.rs b/libsql-wal/src/storage/async_storage.rs index aca4cbe6c4..24347655ac 100644 --- a/libsql-wal/src/storage/async_storage.rs +++ b/libsql-wal/src/storage/async_storage.rs @@ -1,7 +1,6 @@ //! `AsyncStorage` is a `Storage` implementation that defer storage to a background thread. The //! durable frame_no is notified asynchronously. -use std::any::Any; use std::sync::Arc; use chrono::Utc; @@ -23,9 +22,9 @@ use super::{OnStoreCallback, RestoreOptions, Storage, StoreSegmentRequest}; /// /// On shutdown, attempts to empty the queue, and flush the receiver. When the last handle of the /// receiver is dropped, and the queue is empty, exit. -pub struct AsyncStorageLoop { - receiver: mpsc::UnboundedReceiver>, - scheduler: Scheduler, +pub struct AsyncStorageLoop { + receiver: mpsc::UnboundedReceiver>, + scheduler: Scheduler, backend: Arc, io: Arc, max_in_flight: usize, @@ -114,19 +113,12 @@ where &self, namespace: NamespaceName, ret: oneshot::Sender>, - config_override: Option>, + config_override: Option, ) { let backend = self.backend.clone(); - let config = match config_override - .map(|c| c.downcast::()) - .transpose() - { - Ok(Some(config)) => config, - Ok(None) => backend.default_config(), - Err(_) => { - let _ = ret.send(Err(super::Error::InvalidConfigType)); - return; - } + let config = match config_override { + Some(config) => config, + None => backend.default_config(), }; tokio::spawn(async move { @@ -147,18 +139,18 @@ pub struct BottomlessConfig { pub config: C, } -enum StorageLoopMessage { - StoreReq(StoreSegmentRequest), +enum StorageLoopMessage { + StoreReq(StoreSegmentRequest), DurableFrameNoReq { namespace: NamespaceName, - config_override: Option>, + config_override: Option, ret: oneshot::Sender>, }, } -pub struct AsyncStorage { +pub struct AsyncStorage { /// send request to the main loop - job_sender: mpsc::UnboundedSender>, + job_sender: mpsc::UnboundedSender>, force_shutdown: oneshot::Sender<()>, backend: Arc, } @@ -175,18 +167,14 @@ where &self, namespace: &NamespaceName, segment: Self::Segment, - config_override: Option>, + config_override: Option, on_store_callback: OnStoreCallback, ) { - fn into_any(t: Arc) -> Arc { - t - } - let req = StoreSegmentRequest { namespace: namespace.clone(), segment, created_at: Utc::now(), - storage_config_override: config_override.map(into_any), + storage_config_override: config_override, on_store_callback, }; @@ -198,7 +186,7 @@ where async fn durable_frame_no( &self, namespace: &NamespaceName, - config_override: Option>, + config_override: Option, ) -> u64 { let config = config_override.unwrap_or_else(|| self.backend.default_config()); let meta = self.backend.meta(&config, namespace).await.unwrap(); @@ -210,7 +198,7 @@ where file: impl crate::io::FileExt, namespace: &NamespaceName, restore_options: RestoreOptions, - config_override: Option>, + config_override: Option, ) -> super::Result<()> { let config = config_override.unwrap_or_else(|| self.backend.default_config()); self.backend @@ -221,7 +209,7 @@ where fn durable_frame_no_sync( &self, namespace: &NamespaceName, - config_override: Option>, + config_override: Option, ) -> u64 { tokio::runtime::Handle::current() .block_on(self.durable_frame_no(namespace, config_override)) @@ -231,7 +219,7 @@ where &self, namespace: &NamespaceName, frame_no: u64, - config_override: Option>, + config_override: Option, ) -> super::Result { let config = config_override.unwrap_or_else(|| self.backend.default_config()); let key = self @@ -245,7 +233,7 @@ where &self, namespace: &NamespaceName, key: &super::SegmentKey, - config_override: Option>, + config_override: Option, ) -> super::Result>> { let config = config_override.unwrap_or_else(|| self.backend.default_config()); let index = self @@ -259,7 +247,7 @@ where &self, namespace: &NamespaceName, key: &super::SegmentKey, - config_override: Option>, + config_override: Option, ) -> super::Result> { // TODO: make async let config = config_override.unwrap_or_else(|| self.backend.default_config()); diff --git a/libsql-wal/src/storage/backend/mod.rs b/libsql-wal/src/storage/backend/mod.rs index 6c40903eb2..25cc57f1a0 100644 --- a/libsql-wal/src/storage/backend/mod.rs +++ b/libsql-wal/src/storage/backend/mod.rs @@ -31,7 +31,7 @@ pub struct DbMeta { pub trait Backend: Send + Sync + 'static { /// Config type associated with the Storage - type Config: Send + Sync + 'static; + type Config: Clone + Send + Sync + 'static; /// Store `segment_data` with its associated `meta` fn store( @@ -42,19 +42,19 @@ pub trait Backend: Send + Sync + 'static { segment_index: Vec, ) -> impl Future> + Send; - async fn find_segment( + fn find_segment( &self, config: &Self::Config, namespace: &NamespaceName, frame_no: u64, - ) -> Result; + ) -> impl Future> + Send; - async fn fetch_segment_index( + fn fetch_segment_index( &self, config: &Self::Config, namespace: &NamespaceName, key: &SegmentKey, - ) -> Result>>; + ) -> impl Future>>> + Send; /// Fetch a segment for `namespace` containing `frame_no`, and writes it to `dest`. async fn fetch_segment_data_to_file( @@ -67,12 +67,12 @@ pub trait Backend: Send + Sync + 'static { // this method taking self: Arc is an infortunate consequence of rust type system making // impl FileExt variant with all the arguments, with no escape hatch... - async fn fetch_segment_data( + fn fetch_segment_data( self: Arc, - config: Arc, + config: Self::Config, namespace: NamespaceName, key: SegmentKey, - ) -> Result; + ) -> impl Future> + Send; // /// Fetch a segment for `namespace` containing `frame_no`, and writes it to `dest`. async fn fetch_segment( @@ -99,7 +99,7 @@ pub trait Backend: Send + Sync + 'static { ) -> Result<()>; /// Returns the default configuration for this storage - fn default_config(&self) -> Arc; + fn default_config(&self) -> Self::Config; } impl Backend for Arc { @@ -132,7 +132,7 @@ impl Backend for Arc { self.as_ref().meta(config, namespace).await } - fn default_config(&self) -> Arc { + fn default_config(&self) -> Self::Config { self.as_ref().default_config() } @@ -184,7 +184,7 @@ impl Backend for Arc { async fn fetch_segment_data( self: Arc, - config: Arc, + config: Self::Config, namespace: NamespaceName, key: SegmentKey, ) -> Result { diff --git a/libsql-wal/src/storage/backend/s3.rs b/libsql-wal/src/storage/backend/s3.rs index ee09169961..811c84178b 100644 --- a/libsql-wal/src/storage/backend/s3.rs +++ b/libsql-wal/src/storage/backend/s3.rs @@ -334,7 +334,7 @@ impl Backend for S3Backend where IO: Io, { - type Config = S3Config; + type Config = Arc; async fn store( &self, @@ -425,7 +425,7 @@ where }) } - fn default_config(&self) -> Arc { + fn default_config(&self) -> Self::Config { self.default_config.clone() } @@ -489,7 +489,7 @@ where async fn fetch_segment_data( self: Arc, - config: Arc, + config: Self::Config, namespace: NamespaceName, key: SegmentKey, ) -> Result { @@ -650,11 +650,11 @@ mod tests { let dir = tempfile::tempdir().unwrap(); let (aws_config, _s3) = setup(&dir); - let s3_config = S3Config { + let s3_config = Arc::new(S3Config { bucket: "testbucket".into(), aws_config: aws_config.clone(), cluster_id: "123456789".into(), - }; + }); let storage = S3Backend::from_sdk_config_with_io( aws_config, diff --git a/libsql-wal/src/storage/job.rs b/libsql-wal/src/storage/job.rs index 2f4192b8df..bba4025224 100644 --- a/libsql-wal/src/storage/job.rs +++ b/libsql-wal/src/storage/job.rs @@ -9,13 +9,13 @@ use crate::segment::Segment; /// A request, with an id #[derive(Debug)] -pub(crate) struct IndexedRequest { - pub(crate) request: StoreSegmentRequest, +pub(crate) struct IndexedRequest { + pub(crate) request: StoreSegmentRequest, pub(crate) id: u64, } -impl Deref for IndexedRequest { - type Target = StoreSegmentRequest; +impl Deref for IndexedRequest { + type Target = StoreSegmentRequest; fn deref(&self) -> &Self::Target { &self.request @@ -24,32 +24,21 @@ impl Deref for IndexedRequest { /// A storage Job to be performed #[derive(Debug)] -pub(crate) struct Job { +pub(crate) struct Job { /// Segment to store. // TODO: implement request batching (merge segment and send). - pub(crate) request: IndexedRequest, + pub(crate) request: IndexedRequest, } -// #[repr(transparent)] -// struct BytesLike(pub T); -// -// impl AsRef<[u8]> for BytesLike -// where -// T: AsBytes, -// { -// fn as_ref(&self) -> &[u8] { -// self.0.as_bytes() -// } -// } -// -impl Job +impl Job where Seg: Segment, + C: Clone, { /// Perform the job and return the JobResult. This is not allowed to panic. - pub(crate) async fn perform(self, backend: B, io: IO) -> JobResult + pub(crate) async fn perform(self, backend: B, io: IO) -> JobResult where - B: Backend, + B: Backend, IO: Io, { let result = self.try_perform(backend, io).await; @@ -58,13 +47,15 @@ where async fn try_perform(&self, backend: B, io: IO) -> Result where - B: Backend, + B: Backend, IO: Io, { let segment = &self.request.segment; let segment_id = io.uuid(); let tmp = io.tempfile()?; + tracing::debug!(namespace = self.request.namespace.as_str(), "sending segment to durable storage"); + let new_index = segment .compact(&tmp, segment_id) .await @@ -81,21 +72,25 @@ where .request .storage_config_override .clone() - .map(|c| c.downcast::()) - .transpose() - .map_err(|_| super::Error::InvalidConfigType)? .unwrap_or_else(|| backend.default_config()); backend.store(&config, meta, tmp, new_index).await?; + tracing::info!( + namespace = self.request.namespace.as_str(), + start_frame_no = segment.start_frame_no(), + end_frame_no = segment.last_committed(), + "stored segment" + ); + Ok(segment.last_committed()) } } #[derive(Debug)] -pub(crate) struct JobResult { +pub(crate) struct JobResult { /// The job that was performed - pub(crate) job: Job, + pub(crate) job: Job, /// The outcome of the job: the new durable index, or an error. pub(crate) result: Result, } @@ -453,8 +448,8 @@ mod test { todo!() } - fn default_config(&self) -> Arc { - Arc::new(()) + fn default_config(&self) -> Self::Config { + () } async fn restore( @@ -497,7 +492,7 @@ mod test { async fn fetch_segment_data( self: Arc, - _config: Arc, + _config: Self::Config, _namespace: NamespaceName, _key: SegmentKey, ) -> Result { diff --git a/libsql-wal/src/storage/mod.rs b/libsql-wal/src/storage/mod.rs index 76347de96b..757c672ce6 100644 --- a/libsql-wal/src/storage/mod.rs +++ b/libsql-wal/src/storage/mod.rs @@ -1,4 +1,3 @@ -use std::any::Any; use std::collections::BTreeMap; use std::fmt; use std::future::Future; @@ -11,7 +10,7 @@ use chrono::{DateTime, Utc}; use fst::Map; use hashbrown::HashMap; use libsql_sys::name::NamespaceName; -use parking_lot::Mutex; +use libsql_sys::wal::either::Either; use tempfile::{tempdir, TempDir}; use crate::io::{FileExt, Io, StdIO}; @@ -133,7 +132,7 @@ pub type OnStoreCallback = Box< pub trait Storage: Send + Sync + 'static { type Segment: Segment; - type Config; + type Config: Clone + Send; /// store the passed segment for `namespace`. This function is called in a context where /// blocking is acceptable. /// returns a future that resolves when the segment is stored @@ -142,20 +141,20 @@ pub trait Storage: Send + Sync + 'static { &self, namespace: &NamespaceName, seg: Self::Segment, - config_override: Option>, + config_override: Option, on_store: OnStoreCallback, ); fn durable_frame_no_sync( &self, namespace: &NamespaceName, - config_override: Option>, + config_override: Option, ) -> u64; async fn durable_frame_no( &self, namespace: &NamespaceName, - config_override: Option>, + config_override: Option, ) -> u64; async fn restore( @@ -163,29 +162,148 @@ pub trait Storage: Send + Sync + 'static { file: impl FileExt, namespace: &NamespaceName, restore_options: RestoreOptions, - config_override: Option>, + config_override: Option, ) -> Result<()>; - async fn find_segment( + fn find_segment( &self, namespace: &NamespaceName, frame_no: u64, - config_override: Option>, - ) -> Result; + config_override: Option, + ) -> impl Future> + Send; - async fn fetch_segment_index( + fn fetch_segment_index( &self, namespace: &NamespaceName, key: &SegmentKey, - config_override: Option>, - ) -> Result>>; + config_override: Option, + ) -> impl Future>>> + Send; - async fn fetch_segment_data( + fn fetch_segment_data( &self, namespace: &NamespaceName, key: &SegmentKey, - config_override: Option>, - ) -> Result>; + config_override: Option, + ) -> impl Future>> + Send; +} + +/// special zip function for Either storage implementation +fn zip(x: &Either, y: Option>) -> Either<(&A, Option), (&B, Option)>{ + match (x, y) { + (Either::A(a), Some(Either::A(c))) => Either::A((a, Some(c))), + (Either::B(b), Some(Either::B(d))) => Either::B((b, Some(d))), + (Either::A(a), None) => Either::A((a, None)), + (Either::B(b), None) => Either::B((b, None)), + _ => panic!("incompatible options") + } +} + +impl Storage for Either +where A: Storage, + B: Storage, + S: Segment, +{ + type Segment = S; + type Config = Either; + + fn store( + &self, + namespace: &NamespaceName, + seg: Self::Segment, + config_override: Option, + on_store: OnStoreCallback, + ) { + + match zip(self, config_override) { + Either::A((s, c)) => s.store(namespace, seg, c, on_store), + Either::B((s, c)) => s.store(namespace, seg, c, on_store), + } + } + + fn durable_frame_no_sync( + &self, + namespace: &NamespaceName, + config_override: Option, + ) -> u64 { + match zip(self, config_override) { + Either::A((s, c)) => s.durable_frame_no_sync(namespace, c), + Either::B((s, c)) => s.durable_frame_no_sync(namespace, c), + } + } + + async fn durable_frame_no( + &self, + namespace: &NamespaceName, + config_override: Option, + ) -> u64 { + match zip(self, config_override) { + Either::A((s, c)) => s.durable_frame_no(namespace, c).await, + Either::B((s, c)) => s.durable_frame_no(namespace, c).await, + } + } + + async fn restore( + &self, + file: impl FileExt, + namespace: &NamespaceName, + restore_options: RestoreOptions, + config_override: Option, + ) -> Result<()> { + match zip(self, config_override) { + Either::A((s, c)) => s.restore(file, namespace, restore_options, c).await, + Either::B((s, c)) => s.restore(file, namespace, restore_options, c).await, + } + } + + fn find_segment( + &self, + namespace: &NamespaceName, + frame_no: u64, + config_override: Option, + ) -> impl Future> + Send { + async move { + match zip(self, config_override) { + Either::A((s, c)) => s.find_segment(namespace, frame_no, c).await, + Either::B((s, c)) => s.find_segment(namespace, frame_no, c).await, + } + } + } + + fn fetch_segment_index( + &self, + namespace: &NamespaceName, + key: &SegmentKey, + config_override: Option, + ) -> impl Future>>> + Send { + async move { + match zip(self, config_override) { + Either::A((s, c)) => s.fetch_segment_index(namespace, key, c).await, + Either::B((s, c)) => s.fetch_segment_index(namespace, key, c).await, + } + } + } + + fn fetch_segment_data( + &self, + namespace: &NamespaceName, + key: &SegmentKey, + config_override: Option, + ) -> impl Future>> + Send { + async move { + match zip(self, config_override) { + Either::A((s, c)) => { + let seg = s.fetch_segment_data(namespace, key, c).await?; + let seg = seg.remap_file_type(Either::A); + Ok(seg) + }, + Either::B((s, c)) => { + let seg = s.fetch_segment_data(namespace, key, c).await?; + let seg = seg.remap_file_type(Either::B); + Ok(seg) + }, + } + } + } } /// a placeholder storage that doesn't store segment @@ -200,7 +318,7 @@ impl Storage for NoStorage { &self, _namespace: &NamespaceName, _seg: Self::Segment, - _config: Option>, + _config: Option, _on_store: OnStoreCallback, ) { } @@ -208,7 +326,7 @@ impl Storage for NoStorage { async fn durable_frame_no( &self, namespace: &NamespaceName, - config: Option>, + config: Option, ) -> u64 { self.durable_frame_no_sync(namespace, config) } @@ -218,7 +336,7 @@ impl Storage for NoStorage { _file: impl FileExt, _namespace: &NamespaceName, _restore_options: RestoreOptions, - _config_override: Option>, + _config_override: Option, ) -> Result<()> { panic!("can restore from no storage") } @@ -226,7 +344,7 @@ impl Storage for NoStorage { fn durable_frame_no_sync( &self, _namespace: &NamespaceName, - _config_override: Option>, + _config_override: Option, ) -> u64 { u64::MAX } @@ -235,7 +353,7 @@ impl Storage for NoStorage { &self, _namespace: &NamespaceName, _frame_no: u64, - _config_override: Option>, + _config_override: Option, ) -> Result { unimplemented!() } @@ -244,7 +362,7 @@ impl Storage for NoStorage { &self, _namespace: &NamespaceName, _key: &SegmentKey, - _config_override: Option>, + _config_override: Option, ) -> Result>> { unimplemented!() } @@ -253,7 +371,7 @@ impl Storage for NoStorage { &self, _namespace: &NamespaceName, _key: &SegmentKey, - _config_override: Option>, + _config_override: Option, ) -> Result> { unimplemented!(); #[allow(unreachable_code)] @@ -264,7 +382,7 @@ impl Storage for NoStorage { #[doc(hidden)] #[derive(Debug)] pub struct TestStorage { - inner: Arc>>, + inner: Arc>>, } #[derive(Debug)] @@ -318,10 +436,10 @@ impl Storage for TestStorage { &self, namespace: &NamespaceName, seg: Self::Segment, - _config: Option>, + _config: Option, on_store: OnStoreCallback, ) { - let mut inner = self.inner.lock(); + let mut inner = self.inner.lock_blocking(); if inner.store { let id = uuid::Uuid::new_v4(); let out_path = inner.dir.path().join(id.to_string()); @@ -347,7 +465,7 @@ impl Storage for TestStorage { async fn durable_frame_no( &self, namespace: &NamespaceName, - config: Option>, + config: Option, ) -> u64 { self.durable_frame_no_sync(namespace, config) } @@ -357,7 +475,7 @@ impl Storage for TestStorage { _file: impl FileExt, _namespace: &NamespaceName, _restore_options: RestoreOptions, - _config_override: Option>, + _config_override: Option, ) -> Result<()> { todo!(); } @@ -365,9 +483,9 @@ impl Storage for TestStorage { fn durable_frame_no_sync( &self, namespace: &NamespaceName, - _config_override: Option>, + _config_override: Option, ) -> u64 { - let inner = self.inner.lock(); + let inner = self.inner.lock_blocking(); if inner.store { let Some(segs) = inner.stored.get(namespace) else { return 0; @@ -382,9 +500,9 @@ impl Storage for TestStorage { &self, namespace: &NamespaceName, frame_no: u64, - _config_override: Option>, + _config_override: Option, ) -> Result { - let inner = self.inner.lock(); + let inner = self.inner.lock().await; if inner.store { if let Some(segs) = inner.stored.get(namespace) { let Some((key, _path)) = segs.iter().find(|(k, _)| k.includes(frame_no)) else { @@ -403,9 +521,9 @@ impl Storage for TestStorage { &self, namespace: &NamespaceName, key: &SegmentKey, - _config_override: Option>, + _config_override: Option, ) -> Result>> { - let inner = self.inner.lock(); + let inner = self.inner.lock().await; if inner.store { match inner.stored.get(namespace) { Some(segs) => Ok(segs.get(&key).unwrap().1.clone()), @@ -420,9 +538,9 @@ impl Storage for TestStorage { &self, namespace: &NamespaceName, key: &SegmentKey, - _config_override: Option>, + _config_override: Option, ) -> Result> { - let inner = self.inner.lock(); + let inner = self.inner.lock().await; if inner.store { match inner.stored.get(namespace) { Some(segs) => { @@ -438,7 +556,7 @@ impl Storage for TestStorage { } } -pub struct StoreSegmentRequest { +pub struct StoreSegmentRequest { namespace: NamespaceName, /// Path to the segment. Read-only for bottomless segment: S, @@ -447,12 +565,12 @@ pub struct StoreSegmentRequest { /// alternative configuration to use with the storage layer. /// e.g: S3 overrides - storage_config_override: Option>, + storage_config_override: Option, /// Called after the segment was stored, with the new durable index on_store_callback: OnStoreCallback, } -impl fmt::Debug for StoreSegmentRequest +impl fmt::Debug for StoreSegmentRequest where S: fmt::Debug, { diff --git a/libsql-wal/src/storage/scheduler.rs b/libsql-wal/src/storage/scheduler.rs index 8e07deaeb2..a9de9a746a 100644 --- a/libsql-wal/src/storage/scheduler.rs +++ b/libsql-wal/src/storage/scheduler.rs @@ -5,13 +5,13 @@ use super::job::{IndexedRequest, Job, JobResult}; use super::StoreSegmentRequest; use libsql_sys::name::NamespaceName; -struct NamespaceRequests { - requests: VecDeque>, +struct NamespaceRequests { + requests: VecDeque>, /// there's work in flight for this namespace in_flight: bool, } -impl Default for NamespaceRequests { +impl Default for NamespaceRequests { fn default() -> Self { Self { requests: Default::default(), @@ -28,14 +28,14 @@ impl Default for NamespaceRequests { /// processed, because only the most recent segment is checked for durability. This property /// ensures that all segments are present up to the max durable index. /// It is generic over C: the storage config type (for config overrides), and T, the segment type -pub(crate) struct Scheduler { +pub(crate) struct Scheduler { /// notify new durability index for namespace - requests: HashMap>, + requests: HashMap>, queue: priority_queue::PriorityQueue>, next_request_id: u64, } -impl Scheduler { +impl Scheduler { pub fn new() -> Self { Self { requests: Default::default(), @@ -46,7 +46,7 @@ impl Scheduler { /// Register a new request with the scheduler #[tracing::instrument(skip_all)] - pub fn register(&mut self, request: StoreSegmentRequest) { + pub fn register(&mut self, request: StoreSegmentRequest) { // invariant: new segment comes immediately after the latest segment for that namespace. This means: // - immediately after the last registered segment, if there is any // - immediately after the last durable index @@ -71,7 +71,7 @@ impl Scheduler { /// be scheduled, and returns description of the job to be performed. No other job for this /// namespace will be scheduled, until the `JobResult` is reported #[tracing::instrument(skip_all)] - pub fn schedule(&mut self) -> Option> { + pub fn schedule(&mut self) -> Option> { let (name, _) = self.queue.pop()?; let requests = self .requests @@ -90,7 +90,7 @@ impl Scheduler { /// Report the job result to the scheduler. If the job result was a success, the request as /// removed from the queue, else, the job is rescheduled #[tracing::instrument(skip_all, fields(req_id = result.job.request.id))] - pub async fn report(&mut self, result: JobResult) { + pub async fn report(&mut self, result: JobResult) { // re-schedule, or report new max durable frame_no for segment let name = result.job.request.request.namespace.clone(); let requests = self @@ -151,7 +151,7 @@ mod test { #[tokio::test] async fn schedule_simple() { - let mut scheduler = Scheduler::<()>::new(); + let mut scheduler = Scheduler::<(), ()>::new(); let ns1 = NamespaceName::from("test1"); let ns2 = NamespaceName::from("test2"); @@ -224,7 +224,7 @@ mod test { #[tokio::test] async fn job_error_reschedule() { - let mut scheduler = Scheduler::<()>::new(); + let mut scheduler = Scheduler::<(), ()>::new(); let ns1 = NamespaceName::from("test1"); let ns2 = NamespaceName::from("test2"); @@ -264,7 +264,7 @@ mod test { #[tokio::test] async fn schedule_while_in_flight() { - let mut scheduler = Scheduler::<()>::new(); + let mut scheduler = Scheduler::<(), ()>::new(); let ns1 = NamespaceName::from("test1"); diff --git a/libsql-wal/tests/flaky_fs.rs b/libsql-wal/tests/flaky_fs.rs index 9ccc48fd21..cde3c3419b 100644 --- a/libsql-wal/tests/flaky_fs.rs +++ b/libsql-wal/tests/flaky_fs.rs @@ -144,10 +144,6 @@ impl Io for FlakyIo { todo!() } - fn uuid(&self) -> uuid::Uuid { - todo!() - } - fn hard_link(&self, _src: &Path, _dst: &Path) -> std::io::Result<()> { todo!() } From 7aeaa55f41c434c10e72a413bf09e9ec0763bcf6 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 14 Aug 2024 18:19:14 +0200 Subject: [PATCH 47/50] remove segment after checkpoint --- libsql-wal/src/io/mod.rs | 11 ++++++++++- libsql-wal/src/registry.rs | 5 +++-- libsql-wal/src/segment/list.rs | 17 ++++++++++------- libsql-wal/src/segment/mod.rs | 7 +++++++ libsql-wal/src/segment/sealed.rs | 8 ++++++++ libsql-wal/src/shared_wal.rs | 5 ++++- 6 files changed, 42 insertions(+), 11 deletions(-) diff --git a/libsql-wal/src/io/mod.rs b/libsql-wal/src/io/mod.rs index daa4e7f904..4f370065aa 100644 --- a/libsql-wal/src/io/mod.rs +++ b/libsql-wal/src/io/mod.rs @@ -1,4 +1,4 @@ -use std::io; +use std::{future::Future, io}; use std::path::Path; use std::sync::Arc; @@ -41,6 +41,7 @@ pub trait Io: Send + Sync + 'static { }) } + fn remove_file_async(&self, path: &Path) -> impl Future> + Send; } #[derive(Default, Debug, Clone, Copy)] @@ -91,6 +92,10 @@ impl Io for StdIO { { f(&mut thread_rng()) } + + async fn remove_file_async(&self, path: &Path) -> io::Result<()> { + tokio::fs::remove_file(path).await + } } impl Io for Arc { @@ -134,6 +139,10 @@ impl Io for Arc { { self.as_ref().with_rng(f) } + + async fn remove_file_async(&self, path: &Path) ->io::Result<()> { + self.as_ref().remove_file_async(path).await + } } pub struct Inspect { diff --git a/libsql-wal/src/registry.rs b/libsql-wal/src/registry.rs index 204448ca19..20217a0005 100644 --- a/libsql-wal/src/registry.rs +++ b/libsql-wal/src/registry.rs @@ -36,7 +36,7 @@ enum Slot { /// Wal Registry maintains a set of shared Wal, and their respective set of files. pub struct WalRegistry { - io: IO, + io: Arc, path: PathBuf, shutdown: AtomicBool, opened: DashMap>, @@ -63,7 +63,7 @@ impl WalRegistry { ) -> Result { io.create_dir_all(&path)?; let registry = Self { - io, + io: io.into(), path, opened: Default::default(), shutdown: Default::default(), @@ -338,6 +338,7 @@ where shutdown: false.into(), checkpoint_notifier: self.checkpoint_notifier.clone(), max_segment_size: 1000.into(), + io: self.io.clone(), }); self.opened diff --git a/libsql-wal/src/segment/list.rs b/libsql-wal/src/segment/list.rs index 4cc5652dae..330a31e07d 100644 --- a/libsql-wal/src/segment/list.rs +++ b/libsql-wal/src/segment/list.rs @@ -13,7 +13,7 @@ use zerocopy::FromZeroes; use crate::error::Result; use crate::io::buf::{ZeroCopyBoxIoBuf, ZeroCopyBuf}; -use crate::io::FileExt; +use crate::io::{FileExt, Io}; use crate::segment::Frame; use crate::{LibsqlFooter, LIBSQL_MAGIC, LIBSQL_PAGE_SIZE, LIBSQL_WAL_VERSION}; @@ -78,14 +78,13 @@ where /// Checkpoints as many segments as possible to the main db file, and return the checkpointed /// frame_no, if anything was checkpointed - pub async fn checkpoint( + pub async fn checkpoint( &self, - db_file: &F, + db_file: &IO::File, until_frame_no: u64, log_id: Uuid, - ) -> Result> - where - F: FileExt, + io: &IO, + ) -> Result> { struct Guard<'a>(&'a AtomicBool); impl<'a> Drop for Guard<'a> { @@ -170,9 +169,13 @@ where // todo: truncate if necessary - //// todo: make async + //// TODO: make async db_file.sync_all()?; + for seg in segs.iter() { + seg.destroy(io).await; + } + let mut current = self.head.compare_and_swap(&segs[0], None); if Arc::ptr_eq(&segs[0], current.as_ref().unwrap()) { // nothing to do diff --git a/libsql-wal/src/segment/mod.rs b/libsql-wal/src/segment/mod.rs index 3f96589526..2dc6c7c1fb 100644 --- a/libsql-wal/src/segment/mod.rs +++ b/libsql-wal/src/segment/mod.rs @@ -21,6 +21,7 @@ use zerocopy::AsBytes; use crate::error::{Error, Result}; use crate::io::buf::IoBufMut; use crate::io::FileExt; +use crate::io::Io; use crate::LIBSQL_MAGIC; use crate::LIBSQL_PAGE_SIZE; @@ -168,6 +169,8 @@ pub trait Segment: Send + Sync + 'static { async fn read_frame_offset_async(&self, offset: u32, buf: B) -> (B, Result<()>) where B: IoBufMut + Send + 'static; + + fn destroy(&self, io: &IO) -> impl Future; } impl Segment for Arc { @@ -209,6 +212,10 @@ impl Segment for Arc { fn size_after(&self) -> u32 { self.as_ref().size_after() } + + fn destroy(&self, io: &IO) -> impl Future { + self.as_ref().destroy(io) + } } #[repr(C)] diff --git a/libsql-wal/src/segment/sealed.rs b/libsql-wal/src/segment/sealed.rs index 39f8cc9039..ddc71f4168 100644 --- a/libsql-wal/src/segment/sealed.rs +++ b/libsql-wal/src/segment/sealed.rs @@ -183,6 +183,14 @@ where fn size_after(&self) -> u32 { self.header().size_after() } + + fn destroy(&self, io: &IO) -> impl std::future::Future { + async move { + if let Err(e) = io.remove_file_async(&self.path).await { + tracing::error!("failed to remove segment file {:?}: {e}", self.path); + } + } + } } impl SealedSegment { diff --git a/libsql-wal/src/shared_wal.rs b/libsql-wal/src/shared_wal.rs index 09c107bf24..4c9f2cbc63 100644 --- a/libsql-wal/src/shared_wal.rs +++ b/libsql-wal/src/shared_wal.rs @@ -54,6 +54,7 @@ pub struct SharedWal { pub(crate) checkpoint_notifier: mpsc::Sender, /// maximum size the segment is allowed to grow pub(crate) max_segment_size: AtomicUsize, + pub(crate) io: Arc, } impl SharedWal { @@ -154,6 +155,8 @@ impl SharedWal { mut tx_id_lock: async_lock::MutexGuard>, mut reserved: MutexGuard>, ) -> Result> { + assert!(reserved.is_none() || *reserved == Some(read_tx.conn_id), "{}", dbg!(reserved.is_none()) || dbg!(*reserved == Some(read_tx.conn_id))); + assert!(tx_id_lock.is_none()); // we read two fields in the header. There is no risk that a transaction commit in // between the two reads because this would require that: // 1) there would be a running txn @@ -289,7 +292,7 @@ impl SharedWal { .current .load() .tail() - .checkpoint(&self.db_file, durable_frame_no, self.log_id()) + .checkpoint(&self.db_file, durable_frame_no, self.log_id(), &self.io) .await?; dbg!(checkpointed_frame_no); if let Some(checkpointed_frame_no) = checkpointed_frame_no { From dbc2f8457f86c29a89cedf6db95dca79e0eb9183 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Wed, 14 Aug 2024 19:48:41 +0200 Subject: [PATCH 48/50] fix deadlock --- libsql-server/src/hrana/http/request.rs | 1 + libsql-server/src/lib.rs | 3 +- libsql-wal/src/segment/current.rs | 6 ++++ libsql-wal/src/segment/sealed.rs | 1 + libsql-wal/src/shared_wal.rs | 42 +++++++++++-------------- libsql-wal/src/storage/job.rs | 6 ++++ libsql-wal/src/transaction.rs | 6 ++-- libsql-wal/tests/flaky_fs.rs | 8 +++++ libsql/src/hrana/mod.rs | 6 ++-- libsql/src/hrana/stream.rs | 16 ++++++++-- 10 files changed, 63 insertions(+), 32 deletions(-) diff --git a/libsql-server/src/hrana/http/request.rs b/libsql-server/src/hrana/http/request.rs index ac123ac978..533bdb956e 100644 --- a/libsql-server/src/hrana/http/request.rs +++ b/libsql-server/src/hrana/http/request.rs @@ -62,6 +62,7 @@ async fn try_handle( Ok(match request { proto::StreamRequest::None => bail!(ProtocolError::NoneStreamRequest), proto::StreamRequest::Close(_req) => { + dbg!(); stream_guard.close_db(); proto::StreamResponse::Close(proto::CloseStreamResp {}) } diff --git a/libsql-server/src/lib.rs b/libsql-server/src/lib.rs index 63a5617e7c..fec0c4937a 100644 --- a/libsql-server/src/lib.rs +++ b/libsql-server/src/lib.rs @@ -769,7 +769,6 @@ where ); builder.set_credentials_provider(Some(SharedCredentialsProvider::new(cred))); let config = builder.build(); - dbg!(&config); let backend = S3Backend::from_sdk_config( config, opt.bucket_name.clone(), @@ -791,7 +790,7 @@ where Either::B(NoStorage) }; - if dbg!(self.rpc_server_config.is_some()) && dbg!(matches!(storage, Either::B(_))) { + if self.rpc_server_config.is_some() && matches!(storage, Either::B(_)) { anyhow::bail!("replication without bottomless not supported yet"); } diff --git a/libsql-wal/src/segment/current.rs b/libsql-wal/src/segment/current.rs index 6ab829b515..60f7428a54 100644 --- a/libsql-wal/src/segment/current.rs +++ b/libsql-wal/src/segment/current.rs @@ -1022,6 +1022,12 @@ mod test { { f(&mut rand::thread_rng()) } + + fn remove_file_async(&self, path: &std::path::Path) -> impl std::future::Future> + Send { + async move { + std::fs::remove_file(path) + } + } } let tmp = Arc::new(tempdir().unwrap()); diff --git a/libsql-wal/src/segment/sealed.rs b/libsql-wal/src/segment/sealed.rs index ddc71f4168..fa0cc3ce40 100644 --- a/libsql-wal/src/segment/sealed.rs +++ b/libsql-wal/src/segment/sealed.rs @@ -210,6 +210,7 @@ impl SealedSegment { // This happens in case of crash: the segment is not empty, but it wasn't sealed. We need to // recover the index, and seal the segment. + // FIXME: we have a bung here if !header.flags().contains(SegmentFlags::SEALED) { assert_eq!(header.index_offset.get(), 0); return Self::recover(file, path, header).map(Some); diff --git a/libsql-wal/src/shared_wal.rs b/libsql-wal/src/shared_wal.rs index 4c9f2cbc63..61c611f6b0 100644 --- a/libsql-wal/src/shared_wal.rs +++ b/libsql-wal/src/shared_wal.rs @@ -109,7 +109,6 @@ impl SharedWal { match tx { Transaction::Write(_) => unreachable!("already in a write transaction"), Transaction::Read(read_tx) => { - { let mut reserved = self.wal_lock.reserved.lock(); match *reserved { // we have already reserved the slot, go ahead and try to acquire @@ -117,33 +116,31 @@ impl SharedWal { tracing::trace!("taking reserved slot"); reserved.take(); let lock = self.wal_lock.tx_id.lock_blocking(); + assert!(lock.is_none()); let write_tx = self.acquire_write(read_tx, lock, reserved)?; *tx = Transaction::Write(write_tx); return Ok(()); } + None => { + let lock = self.wal_lock.tx_id.lock_blocking(); + if lock.is_none() && self.wal_lock.waiters.is_empty() { + let write_tx = self.acquire_write(read_tx, lock, reserved)?; + *tx = Transaction::Write(write_tx); + return Ok(()); + } + } _ => (), } - } - let lock = self.wal_lock.tx_id.lock_blocking(); - match *lock { - None if self.wal_lock.waiters.is_empty() => { - let write_tx = - self.acquire_write(read_tx, lock, self.wal_lock.reserved.lock())?; - *tx = Transaction::Write(write_tx); - return Ok(()); - } - Some(_) | None => { - tracing::trace!( - "txn currently held by another connection, registering to wait queue" - ); - let parker = crossbeam::sync::Parker::new(); - let unparker = parker.unparker().clone(); - self.wal_lock.waiters.push((unparker, read_tx.conn_id)); - drop(lock); - parker.park(); - } - } + tracing::trace!( + "txn currently held by another connection, registering to wait queue" + ); + + let parker = crossbeam::sync::Parker::new(); + let unparker = parker.unparker().clone(); + self.wal_lock.waiters.push((unparker, read_tx.conn_id)); + drop(reserved); + parker.park(); } } } @@ -155,7 +152,7 @@ impl SharedWal { mut tx_id_lock: async_lock::MutexGuard>, mut reserved: MutexGuard>, ) -> Result> { - assert!(reserved.is_none() || *reserved == Some(read_tx.conn_id), "{}", dbg!(reserved.is_none()) || dbg!(*reserved == Some(read_tx.conn_id))); + assert!(reserved.is_none() || *reserved == Some(read_tx.conn_id)); assert!(tx_id_lock.is_none()); // we read two fields in the header. There is no risk that a transaction commit in // between the two reads because this would require that: @@ -294,7 +291,6 @@ impl SharedWal { .tail() .checkpoint(&self.db_file, durable_frame_no, self.log_id(), &self.io) .await?; - dbg!(checkpointed_frame_no); if let Some(checkpointed_frame_no) = checkpointed_frame_no { self.checkpointed_frame_no .store(checkpointed_frame_no, Ordering::SeqCst); diff --git a/libsql-wal/src/storage/job.rs b/libsql-wal/src/storage/job.rs index bba4025224..42eaba9256 100644 --- a/libsql-wal/src/storage/job.rs +++ b/libsql-wal/src/storage/job.rs @@ -416,6 +416,12 @@ mod test { { todo!() } + + fn destroy(&self, _io: &IO) -> impl std::future::Future { + async move { + todo!() + } + } } struct TestBackend; diff --git a/libsql-wal/src/transaction.rs b/libsql-wal/src/transaction.rs index f2cdd5be70..33ff9fcea1 100644 --- a/libsql-wal/src/transaction.rs +++ b/libsql-wal/src/transaction.rs @@ -276,6 +276,8 @@ impl WriteTransaction { let Self { wal_lock, read_tx, .. } = self; + // always acquire lock in this order: reserved, then tx_id + let mut reserved = wal_lock.reserved.lock(); let mut lock = wal_lock.tx_id.lock_blocking(); match *lock { Some(lock_id) if lock_id == read_tx.id => { @@ -284,7 +286,7 @@ impl WriteTransaction { _ => (), } - if let Some(id) = *wal_lock.reserved.lock() { + if let Some(id) = *reserved { tracing::trace!("tx already reserved by {id}"); return read_tx; } @@ -297,7 +299,7 @@ impl WriteTransaction { } crossbeam::deque::Steal::Success((unparker, id)) => { tracing::trace!("waking up {id}"); - wal_lock.reserved.lock().replace(id); + reserved.replace(id); unparker.unpark(); break; } diff --git a/libsql-wal/tests/flaky_fs.rs b/libsql-wal/tests/flaky_fs.rs index cde3c3419b..701671f03b 100644 --- a/libsql-wal/tests/flaky_fs.rs +++ b/libsql-wal/tests/flaky_fs.rs @@ -154,6 +154,14 @@ impl Io for FlakyIo { { f(&mut self.rng.lock()) } + + fn remove_file_async(&self, path: &Path) -> impl std::future::Future> + Send { + async move { + self.with_random_failure(|| { + std::fs::remove_file(path) + }) + } + } } macro_rules! assert_not_corrupt { diff --git a/libsql/src/hrana/mod.rs b/libsql/src/hrana/mod.rs index 4a6fd0c63a..2bfd90c00d 100644 --- a/libsql/src/hrana/mod.rs +++ b/libsql/src/hrana/mod.rs @@ -36,9 +36,9 @@ struct Cookie { base_url: Option, } -pub trait HttpSend: Clone { - type Stream: Stream> + Unpin; - type Result: Future>; +pub trait HttpSend: Clone + Send + 'static { + type Stream: Stream> + Unpin + Send; + type Result: Future> + Send; fn http_send(&self, url: Arc, auth: Arc, body: String) -> Self::Result; /// Schedule sending a HTTP post request without waiting for the completion. diff --git a/libsql/src/hrana/stream.rs b/libsql/src/hrana/stream.rs index b27e48145e..c63cc2fcb7 100644 --- a/libsql/src/hrana/stream.rs +++ b/libsql/src/hrana/stream.rs @@ -68,7 +68,7 @@ where auth_token, sql_id_generator: 0, baton: None, - }), + }).into(), }), } } @@ -287,9 +287,10 @@ where total_changes: AtomicU64, last_insert_rowid: AtomicI64, is_autocommit: AtomicBool, - stream: Mutex>, + stream: Arc>>, } + #[derive(Debug)] struct RawStream where @@ -401,6 +402,15 @@ where Ok(responses) } + async fn close_stream(&mut self) -> Result<()> { + self + .send_requests([ + StreamRequest::Close(CloseStreamReq {}), + ]) + .await?; + Ok(()) + } + async fn finalize(&mut self, req: StreamRequest) -> Result<(StreamResponse, bool)> { let [resp, get_autocommit, _] = self .send_requests([ @@ -441,6 +451,7 @@ where T: HttpSend, { fn drop(&mut self) { + dbg!(); if let Some(baton) = self.baton.take() { // only send a close request if stream was ever used to send the data tracing::trace!("closing client stream (baton: `{}`)", baton); @@ -449,6 +460,7 @@ where requests: vec![StreamRequest::Close(CloseStreamReq {})], }) .unwrap(); + dbg!(); self.client .clone() .oneshot(self.pipeline_url.clone(), self.auth_token.clone(), req); From 367a3242ac7b61dbf653492ef3478b6f12b927cf Mon Sep 17 00:00:00 2001 From: ad hoc Date: Fri, 16 Aug 2024 11:40:58 +0200 Subject: [PATCH 49/50] fix shutdown --- libsql-server/src/hrana/http/request.rs | 1 - libsql-server/src/http/user/mod.rs | 18 +- libsql-server/src/lib.rs | 244 +++++++++--------------- libsql-wal/src/checkpointer.rs | 3 + libsql-wal/src/error.rs | 2 + libsql-wal/src/registry.rs | 11 +- libsql-wal/src/shared_wal.rs | 15 +- libsql-wal/src/storage/async_storage.rs | 21 ++ libsql-wal/src/storage/backend/s3.rs | 2 + libsql-wal/src/storage/mod.rs | 9 + libsql/src/hrana/stream.rs | 2 - 11 files changed, 161 insertions(+), 167 deletions(-) diff --git a/libsql-server/src/hrana/http/request.rs b/libsql-server/src/hrana/http/request.rs index 533bdb956e..ac123ac978 100644 --- a/libsql-server/src/hrana/http/request.rs +++ b/libsql-server/src/hrana/http/request.rs @@ -62,7 +62,6 @@ async fn try_handle( Ok(match request { proto::StreamRequest::None => bail!(ProtocolError::NoneStreamRequest), proto::StreamRequest::Close(_req) => { - dbg!(); stream_guard.close_db(); proto::StreamResponse::Close(proto::CloseStreamResp {}) } diff --git a/libsql-server/src/http/user/mod.rs b/libsql-server/src/http/user/mod.rs index 17ca0e2ee9..d7f5b30209 100644 --- a/libsql-server/src/http/user/mod.rs +++ b/libsql-server/src/http/user/mod.rs @@ -26,8 +26,7 @@ use libsql_replication::rpc::replication::replication_log_server::{ReplicationLo use serde::de::DeserializeOwned; use serde::Serialize; use serde_json::Number; -use tokio::sync::{mpsc, oneshot, Notify}; -use tokio::task::JoinSet; +use tokio::sync::{mpsc, oneshot}; use tonic::transport::Server; use tower_http::compression::predicate::NotForContentType; @@ -37,7 +36,7 @@ use tower_http::{compression::CompressionLayer, cors}; use crate::auth::{Auth, AuthError, Authenticated, Jwt, Permission, UserAuthContext}; use crate::connection::{Connection, RequestContext}; use crate::error::Error; -use crate::hrana; +use crate::{hrana, TaskManager}; use crate::http::user::db_factory::MakeConnectionExtractorPath; use crate::http::user::timing::timings_middleware; use crate::http::user::types::HttpQuery; @@ -255,7 +254,6 @@ pub struct UserApi { pub enable_console: bool, pub self_url: Option, pub primary_url: Option, - pub shutdown: Arc, } impl UserApi @@ -264,12 +262,12 @@ where P: Proxy, S: ReplicationLog, { - pub fn configure(self, join_set: &mut JoinSet>) -> Arc { + pub fn configure(self, task_manager: &mut TaskManager) -> Arc { let (hrana_accept_tx, hrana_accept_rx) = mpsc::channel(8); let (hrana_upgrade_tx, hrana_upgrade_rx) = mpsc::channel(8); let hrana_http_srv = Arc::new(hrana::http::Server::new(self.self_url.clone())); - join_set.spawn({ + task_manager.spawn_until_shutdown({ let namespaces = self.namespaces.clone(); let user_auth_strategy = self.user_auth_strategy.clone(); let idle_kicker = self @@ -295,7 +293,7 @@ where } }); - join_set.spawn({ + task_manager.spawn_until_shutdown({ let server = hrana_http_srv.clone(); async move { server.run_expire().await; @@ -304,7 +302,7 @@ where }); if let Some(acceptor) = self.hrana_ws_acceptor { - join_set.spawn(async move { + task_manager.spawn_until_shutdown(async move { hrana::ws::listen(acceptor, hrana_accept_tx).await; Ok(()) }); @@ -445,10 +443,10 @@ where let router = router.fallback(handle_fallback); let h2c = crate::h2c::H2cMaker::new(router); - join_set.spawn(async move { + task_manager.spawn_with_shutdown_notify(|shutdown| async move { hyper::server::Server::builder(acceptor) .serve(h2c) - .with_graceful_shutdown(self.shutdown.notified()) + .with_graceful_shutdown(shutdown.notified()) .await .context("http server")?; Ok(()) diff --git a/libsql-server/src/lib.rs b/libsql-server/src/lib.rs index fec0c4937a..71320fc486 100644 --- a/libsql-server/src/lib.rs +++ b/libsql-server/src/lib.rs @@ -32,7 +32,7 @@ use aws_smithy_runtime::client::http::hyper_014::HyperClientBuilder; use config::{ AdminApiConfig, DbConfig, HeartbeatConfig, RpcClientConfig, RpcServerConfig, UserApiConfig, }; -use futures::future::{pending, ready}; +use futures::future::ready; use futures::Future; use http::user::UserApi; use hyper::client::HttpConnector; @@ -208,9 +208,77 @@ struct Services { disable_default_namespace: bool, db_config: DbConfig, user_auth_strategy: Auth, +} + +struct TaskManager { + join_set: JoinSet>, shutdown: Arc, } +impl TaskManager { + /// pass a shutdown notifier to the task. The task must shutdown upon receiving a signal + pub fn spawn_with_shutdown_notify(&mut self, f: F) + where F: FnOnce(Arc) -> Fut, + Fut: Future> + Send + 'static, + { + let fut = f(self.shutdown.clone()); + self.join_set.spawn(fut); + } + + pub fn spawn_until_shutdown(&mut self, fut: F) + where + F: Future> + Send + 'static, + { + self.spawn_until_shutdown_with_teardown(fut, ready(Ok(()))) + } + + /// run the passed future until shutdown is called, then call the passed teardown future + #[track_caller] + pub fn spawn_until_shutdown_with_teardown( + &mut self, + fut: F, + teardown: T, + ) where + F: Future> + Send + 'static, + T: Future> + Send + 'static, + { + let shutdown = self.shutdown.clone(); + self.join_set.spawn(async move { + tokio::select! { + _ = shutdown.notified() => { + let ret = teardown.await; + if let Err(ref e) = ret { + let caller = std::panic::Location::caller(); + tracing::error!(caller = caller.to_string(), "task teardown returned an error: {e}"); + } + ret + }, + ret = fut => ret + } + }); + } + + fn new() -> Self { + Self { join_set: JoinSet::new(), shutdown: Arc::new(Notify::new()) } + } + + pub async fn shutdown(&mut self) -> anyhow::Result<()> { + self.shutdown.notify_waiters(); + while let Some(ret) = self.join_set.join_next().await { + ret?? + } + + Ok(()) + } + + pub async fn join_next(&mut self) -> anyhow::Result<()> { + if let Some(ret) = self.join_set.join_next().await { + ret??; + } + Ok(()) + } +} + impl Services where A: crate::net::Accept, @@ -218,7 +286,7 @@ where S: ReplicationLog, C: Connector, { - fn configure(self, join_set: &mut JoinSet>) { + fn configure(self, task_manager: &mut TaskManager) { let user_http = UserApi { http_acceptor: self.user_api_config.http_acceptor, hrana_ws_acceptor: self.user_api_config.hrana_ws_acceptor, @@ -233,10 +301,9 @@ where enable_console: self.user_api_config.enable_http_console, self_url: self.user_api_config.self_url, primary_url: self.user_api_config.primary_url, - shutdown: self.shutdown.clone(), }; - let user_http_service = user_http.configure(join_set); + let user_http_service = user_http.configure(task_manager); if let Some(AdminApiConfig { acceptor, @@ -244,8 +311,7 @@ where disable_metrics, }) = self.admin_api_config { - let shutdown = self.shutdown.clone(); - join_set.spawn(http::admin::run( + task_manager.spawn_with_shutdown_notify(|shutdown| http::admin::run( acceptor, user_http_service, self.namespace_store, @@ -357,7 +423,7 @@ where fn spawn_monitoring_tasks( &self, - join_set: &mut JoinSet>, + task_manager: &mut TaskManager, stats_receiver: mpsc::Receiver<(NamespaceName, MetaStoreHandle, Weak)>, ) -> anyhow::Result<()> { match self.heartbeat_config { @@ -368,7 +434,7 @@ where config.heartbeat_period, ); - self.spawn_until_shutdown_on(join_set, { + task_manager.spawn_until_shutdown({ let heartbeat_auth = config.heartbeat_auth.clone(); let heartbeat_period = config.heartbeat_period; let heartbeat_url = if let Some(url) = &config.heartbeat_url { @@ -405,7 +471,6 @@ where proxy_service: P, replication_service: L, user_auth_strategy: Auth, - shutdown: Arc, ) -> Services { Services { namespace_store, @@ -418,13 +483,12 @@ where disable_default_namespace: self.disable_default_namespace, db_config: self.db_config, user_auth_strategy, - shutdown, } } pub async fn start(mut self) -> anyhow::Result<()> { static INIT: std::sync::Once = std::sync::Once::new(); - let mut join_set = JoinSet::new(); + let mut task_manager = TaskManager::new(); if std::env::var("LIBSQL_SQLITE_MIMALLOC").is_ok() { setup_sqlite_alloc(); @@ -460,7 +524,7 @@ where let (scripted_backup, script_backup_task) = ScriptBackupManager::new(&self.path, CommandHandler::new(command.to_string())) .await?; - self.spawn_until_shutdown_on(&mut join_set, script_backup_task.run()); + task_manager.spawn_until_shutdown(script_backup_task.run()); Some(scripted_backup) } None => None, @@ -490,7 +554,7 @@ where .make_configurators_and_replication_svc( base_config, client_config.clone(), - &mut join_set, + &mut task_manager, scheduler_sender.into(), scripted_backup, ) @@ -518,7 +582,7 @@ where ) .await?; - self.spawn_monitoring_tasks(&mut join_set, stats_receiver)?; + self.spawn_monitoring_tasks(&mut task_manager, stats_receiver)?; // if namespaces are enabled, then bottomless must have set DB ID if !self.disable_namespaces { @@ -534,7 +598,7 @@ where let proxy_service = ProxyService::new(namespace_store.clone(), None, self.disable_namespaces); // Garbage collect proxy clients every 30 seconds - self.spawn_until_shutdown_on(&mut join_set, { + task_manager.spawn_until_shutdown({ let clients = proxy_service.clients(); async move { loop { @@ -551,8 +615,7 @@ where false, ); - self.spawn_until_shutdown_on( - &mut join_set, + task_manager.spawn_until_shutdown( run_rpc_server( proxy_service, config.acceptor, @@ -572,7 +635,7 @@ where // The migration scheduler is only useful on the primary let meta_conn = metastore_conn_maker()?; let scheduler = Scheduler::new(namespace_store.clone(), meta_conn).await?; - self.spawn_until_shutdown_on(&mut join_set, async move { + task_manager.spawn_until_shutdown(async move { scheduler.run(scheduler_receiver).await; Ok(()) }); @@ -602,7 +665,7 @@ where ); // Garbage collect proxy clients every 30 seconds - self.spawn_until_shutdown_on(&mut join_set, { + task_manager.spawn_until_shutdown({ let clients = proxy_svc.clients(); async move { loop { @@ -618,9 +681,8 @@ where proxy_svc, replication_svc, user_auth_strategy.clone(), - service_shutdown.clone(), ) - .configure(&mut join_set); + .configure(&mut task_manager); } DatabaseKind::Replica => { let (channel, uri) = client_config.clone().unwrap(); @@ -639,16 +701,16 @@ where proxy_svc, replication_svc, user_auth_strategy, - service_shutdown.clone(), ) - .configure(&mut join_set); + .configure(&mut task_manager); } }; tokio::select! { _ = shutdown.notified() => { let shutdown = async { - join_set.shutdown().await; + task_manager.shutdown().await?; + // join_set.shutdown().await; service_shutdown.notify_waiters(); namespace_store.shutdown().await?; @@ -670,8 +732,8 @@ where } } - Some(res) = join_set.join_next() => { - res??; + res = task_manager.join_next() => { + res?; }, else => (), } @@ -683,7 +745,7 @@ where &self, base_config: BaseNamespaceConfig, client_config: Option<(Channel, Uri)>, - join_set: &mut JoinSet>, + task_manager: &mut TaskManager, migration_scheduler_handle: SchedulerHandle, scripted_backup: Option, ) -> anyhow::Result<(NamespaceConfigurators, MakeReplicationSvc)> { @@ -712,7 +774,7 @@ where Some(CustomWAL::LibsqlWal) => self.libsql_wal_configurators( base_config, client_config, - join_set, + task_manager, migration_scheduler_handle, scripted_backup, wal_path, @@ -740,7 +802,7 @@ where &self, base_config: BaseNamespaceConfig, client_config: Option<(Channel, Uri)>, - join_set: &mut JoinSet>, + task_manager: &mut TaskManager, migration_scheduler_handle: SchedulerHandle, scripted_backup: Option, wal_path: PathBuf, @@ -780,7 +842,7 @@ where }; let (storage, storage_loop) = AsyncStorage::new(config).await; - join_set.spawn(async move { + task_manager.spawn_with_shutdown_notify(|_| async move { storage_loop.run().await; Ok(()) }); @@ -796,7 +858,7 @@ where let registry = Arc::new(WalRegistry::new(wal_path, storage, sender)?); let checkpointer = LibsqlCheckpointer::new(registry.clone(), receiver, 8); - self.spawn_until_shutdown_on(join_set, async move { + task_manager.spawn_with_shutdown_notify(|_| async move { checkpointer.run().await; Ok(()) }); @@ -815,9 +877,10 @@ where .into() }); - self.spawn_until_shutdown_with_teardown(join_set, pending(), { + task_manager.spawn_with_shutdown_notify(|shutdown| { let registry = registry.clone(); async move { + shutdown.notified().await; registry.shutdown().await?; Ok(()) } @@ -934,34 +997,6 @@ where Ok((configurators, make_replication_svc)) } - fn spawn_until_shutdown_on(&self, join_set: &mut JoinSet>, fut: F) - where - F: Future> + Send + 'static, - { - self.spawn_until_shutdown_with_teardown(join_set, fut, ready(Ok(()))) - } - - /// run the passed future until shutdown is called, then call the passed teardown future - fn spawn_until_shutdown_with_teardown( - &self, - join_set: &mut JoinSet>, - fut: F, - teardown: T, - ) where - F: Future> + Send + 'static, - T: Future> + Send + 'static, - { - let shutdown = self.shutdown.clone(); - join_set.spawn(async move { - tokio::select! { - _ = shutdown.notified() => { - teardown.await - }, - ret = fut => ret - } - }); - } - async fn legacy_configurators( &self, base_config: BaseNamespaceConfig, @@ -1063,95 +1098,6 @@ where }) } - // fn configure_wal_manager( - // &self, - // join_set: &mut JoinSet>, - // ) -> anyhow::Result<( - // Arc InnerWalManager + Sync + Send + 'static>, - // Pin> + Send + Sync + 'static>>, - // )> { - // let wal_path = self.path.join("wals"); - // let enable_libsql_wal_test = { - // let is_primary = self.rpc_server_config.is_some(); - // let is_libsql_wal_test = std::env::var("LIBSQL_WAL_TEST").is_ok(); - // is_primary && is_libsql_wal_test - // }; - // let use_libsql_wal = - // self.use_custom_wal == Some(CustomWAL::LibsqlWal) || enable_libsql_wal_test; - // if !use_libsql_wal { - // if wal_path.try_exists()? { - // anyhow::bail!("database was previously setup to use libsql-wal"); - // } - // } - // - // if self.use_custom_wal.is_some() { - // if self.db_config.bottomless_replication.is_some() { - // anyhow::bail!("bottomless not supported with custom WAL"); - // } - // if self.rpc_client_config.is_some() { - // anyhow::bail!("custom WAL not supported in replica mode"); - // } - // } - // - // let namespace_resolver = |path: &Path| { - // NamespaceName::from_string( - // path.parent() - // .unwrap() - // .file_name() - // .unwrap() - // .to_str() - // .unwrap() - // .to_string(), - // ) - // .unwrap() - // .into() - // }; - // - // match self.use_custom_wal { - // Some(CustomWAL::LibsqlWal) => { - // let (sender, receiver) = tokio::sync::mpsc::channel(64); - // let registry = Arc::new(WalRegistry::new(wal_path, SqldStorage, sender)?); - // let checkpointer = LibsqlCheckpointer::new(registry.clone(), receiver, 8); - // join_set.spawn(async move { - // checkpointer.run().await; - // Ok(()) - // }); - // - // let wal = LibsqlWalManager::new(registry.clone(), Arc::new(namespace_resolver)); - // let shutdown_notify = self.shutdown.clone(); - // let shutdown_fut = Box::pin(async move { - // shutdown_notify.notified().await; - // registry.shutdown().await?; - // Ok(()) - // }); - // - // tracing::info!("using libsql wal"); - // Ok((Arc::new(move || EitherWAL::B(wal.clone())), shutdown_fut)) - // } - // #[cfg(feature = "durable-wal")] - // Some(CustomWAL::DurableWal) => { - // tracing::info!("using durable wal"); - // let lock_manager = Arc::new(std::sync::Mutex::new(LockManager::new())); - // let wal = DurableWalManager::new( - // lock_manager, - // namespace_resolver, - // self.storage_server_address.clone(), - // ); - // Ok(( - // Arc::new(move || EitherWAL::C(wal.clone())), - // Box::pin(ready(Ok(()))), - // )) - // } - // None => { - // tracing::info!("using sqlite3 wal"); - // Ok(( - // Arc::new(|| EitherWAL::A(Sqlite3WalManager::default())), - // Box::pin(ready(Ok(()))), - // )) - // } - // } - // } - async fn get_client_config(&self) -> anyhow::Result> { match self.rpc_client_config { Some(ref config) => Ok(Some(config.configure().await?)), diff --git a/libsql-wal/src/checkpointer.rs b/libsql-wal/src/checkpointer.rs index 96a7b83bd7..869d8e3545 100644 --- a/libsql-wal/src/checkpointer.rs +++ b/libsql-wal/src/checkpointer.rs @@ -121,6 +121,7 @@ where pub async fn run(mut self) { loop { if self.should_exit() { + dbg!(); tracing::info!("checkpointer exited cleanly."); return; } @@ -135,6 +136,7 @@ where fn should_exit(&self) -> bool { self.shutting_down + && self.recv.is_empty() && self.scheduled.is_empty() && self.checkpointing.is_empty() && self.join_set.is_empty() @@ -162,6 +164,7 @@ where self.scheduled.insert(namespace); } None | Some(CheckpointMessage::Shutdown) => { + tracing::info!("checkpointed is shutting down. {} namespaces to checkpoint", self.checkpointing.len()); self.shutting_down = true; } } diff --git a/libsql-wal/src/error.rs b/libsql-wal/src/error.rs index 003c4b4062..92bd51504a 100644 --- a/libsql-wal/src/error.rs +++ b/libsql-wal/src/error.rs @@ -17,6 +17,8 @@ pub enum Error { InvalidHeaderVersion, #[error("Invalid page size, only 4095 is supported")] InvalidPageSize, + #[error("Registry is shutting down")] + ShuttingDown, } impl Into for Error { diff --git a/libsql-wal/src/registry.rs b/libsql-wal/src/registry.rs index 20217a0005..290fdf196e 100644 --- a/libsql-wal/src/registry.rs +++ b/libsql-wal/src/registry.rs @@ -170,7 +170,7 @@ where namespace: &NamespaceName, ) -> Result>> { if self.shutdown.load(Ordering::SeqCst) { - todo!("open after shutdown"); + return Err(crate::error::Error::ShuttingDown) } loop { @@ -350,6 +350,7 @@ where // On shutdown, we checkpoint all the WALs. This require sealing the current segment, and when // checkpointing all the segments pub async fn shutdown(self: Arc) -> Result<()> { + tracing::info!("shutting down registry"); self.shutdown.store(true, Ordering::SeqCst); let mut join_set = JoinSet::>::new(); @@ -388,7 +389,12 @@ where } while join_set.join_next().await.is_some() {} + dbg!(); + // we process any pending storage job, then checkpoint everything + self.storage.shutdown().await; + + dbg!(); // wait for checkpointer to exit let _ = self .checkpoint_notifier @@ -396,8 +402,7 @@ where .await; self.checkpoint_notifier.closed().await; - // todo: shutdown storage - // self.storage.shutdown().await; + tracing::info!("registry shutdown gracefully"); Ok(()) } diff --git a/libsql-wal/src/shared_wal.rs b/libsql-wal/src/shared_wal.rs index 61c611f6b0..6661c3a2a1 100644 --- a/libsql-wal/src/shared_wal.rs +++ b/libsql-wal/src/shared_wal.rs @@ -58,10 +58,20 @@ pub struct SharedWal { } impl SharedWal { + #[tracing::instrument(skip(self), fields(namespace = self.namespace.as_str()))] pub fn shutdown(&self) -> Result<()> { + tracing::info!("started namespace shutdown"); self.shutdown.store(true, Ordering::SeqCst); - let mut tx = Transaction::Read(self.begin_read(u64::MAX)); - self.upgrade(&mut tx)?; + // fixme: for infinite loop + let mut tx = loop { + let mut tx = Transaction::Read(self.begin_read(u64::MAX)); + match self.upgrade(&mut tx) { + Ok(_) => break tx, + Err(Error::BusySnapshot) => continue, + Err(e) => return Err(e), + } + }; + { let mut tx = tx.as_write_mut().unwrap().lock(); tx.commit(); @@ -70,6 +80,7 @@ impl SharedWal { // The current segment will not be used anymore. It's empty, but we still seal it so that // the next startup doesn't find an unsealed segment. self.current.load().seal()?; + tracing::info!("namespace shutdown"); Ok(()) } diff --git a/libsql-wal/src/storage/async_storage.rs b/libsql-wal/src/storage/async_storage.rs index 24347655ac..7b6e37f29f 100644 --- a/libsql-wal/src/storage/async_storage.rs +++ b/libsql-wal/src/storage/async_storage.rs @@ -49,6 +49,7 @@ where pub async fn run(mut self) { let mut shutting_down = false; let mut in_flight_futs = JoinSet::new(); + let mut notify_shutdown = None; // run the loop until shutdown. loop { if shutting_down && self.scheduler.is_empty() { @@ -91,6 +92,12 @@ where Some(StorageLoopMessage::DurableFrameNoReq { namespace, ret, config_override }) => { self.fetch_durable_frame_no_async(namespace, ret, config_override); } + Some(StorageLoopMessage::Shutdown(ret)) => { + dbg!(); + notify_shutdown.replace(ret); + shutting_down = true; + tracing::info!("Storage shutting down"); + } None => { shutting_down = true; } @@ -107,6 +114,11 @@ where } } } + + tracing::info!("Storage shutdown"); + if let Some(notify) = notify_shutdown { + let _ = notify.send(()); + } } fn fetch_durable_frame_no_async( @@ -146,6 +158,7 @@ enum StorageLoopMessage { config_override: Option, ret: oneshot::Sender>, }, + Shutdown(oneshot::Sender<()>), } pub struct AsyncStorage { @@ -163,6 +176,13 @@ where type Segment = S; type Config = B::Config; + async fn shutdown(&self) { + dbg!(); + let (snd, rcv) = oneshot::channel(); + let _ = self.job_sender.send(StorageLoopMessage::Shutdown(snd)); + let _ = rcv.await; + } + fn store( &self, namespace: &NamespaceName, @@ -258,6 +278,7 @@ where let segment = CompactedSegment::open(file).await?; Ok(segment) } + } pub struct AsyncStorageInitConfig { diff --git a/libsql-wal/src/storage/backend/s3.rs b/libsql-wal/src/storage/backend/s3.rs index 811c84178b..b81dd29ca8 100644 --- a/libsql-wal/src/storage/backend/s3.rs +++ b/libsql-wal/src/storage/backend/s3.rs @@ -265,6 +265,8 @@ impl S3Backend { } let next_frame_no = header.start_frame_no.get() - 1; + dbg!(next_frame_no); + dbg!((seen.len(), db_size)); let Some(key) = self .find_segment_inner(config, &folder_key, next_frame_no) .await? diff --git a/libsql-wal/src/storage/mod.rs b/libsql-wal/src/storage/mod.rs index 757c672ce6..45edff1d12 100644 --- a/libsql-wal/src/storage/mod.rs +++ b/libsql-wal/src/storage/mod.rs @@ -185,6 +185,8 @@ pub trait Storage: Send + Sync + 'static { key: &SegmentKey, config_override: Option, ) -> impl Future>> + Send; + + fn shutdown(&self) -> impl Future + Send { async { dbg!(()) } } } /// special zip function for Either storage implementation @@ -304,6 +306,13 @@ where A: Storage, } } } + + async fn shutdown(&self) { + match self { + Either::A(a) => a.shutdown().await, + Either::B(b) => b.shutdown().await, + } + } } /// a placeholder storage that doesn't store segment diff --git a/libsql/src/hrana/stream.rs b/libsql/src/hrana/stream.rs index c63cc2fcb7..23f2bcc220 100644 --- a/libsql/src/hrana/stream.rs +++ b/libsql/src/hrana/stream.rs @@ -451,7 +451,6 @@ where T: HttpSend, { fn drop(&mut self) { - dbg!(); if let Some(baton) = self.baton.take() { // only send a close request if stream was ever used to send the data tracing::trace!("closing client stream (baton: `{}`)", baton); @@ -460,7 +459,6 @@ where requests: vec![StreamRequest::Close(CloseStreamReq {})], }) .unwrap(); - dbg!(); self.client .clone() .oneshot(self.pipeline_url.clone(), self.auth_token.clone(), req); From 22d439163ebe9975578df685ff5aeb6fa4a79c2a Mon Sep 17 00:00:00 2001 From: ad hoc Date: Fri, 16 Aug 2024 13:32:13 +0200 Subject: [PATCH 50/50] fix truncate bug --- libsql-server/src/namespace/configurator/helpers.rs | 10 +++++----- libsql-wal/src/checkpointer.rs | 1 - libsql-wal/src/segment/list.rs | 5 ++--- libsql-wal/src/storage/async_storage.rs | 2 -- libsql-wal/src/storage/backend/s3.rs | 2 -- libsql-wal/src/storage/mod.rs | 2 +- libsql-wal/src/wal.rs | 2 +- 7 files changed, 9 insertions(+), 15 deletions(-) diff --git a/libsql-server/src/namespace/configurator/helpers.rs b/libsql-server/src/namespace/configurator/helpers.rs index a1102d0ea8..558ccee7ca 100644 --- a/libsql-server/src/namespace/configurator/helpers.rs +++ b/libsql-server/src/namespace/configurator/helpers.rs @@ -362,11 +362,11 @@ pub(super) async fn make_stats( } }); - join_set.spawn(run_storage_monitor( - db_path.into(), - Arc::downgrade(&stats), - encryption_config, - )); + // join_set.spawn(run_storage_monitor( + // db_path.into(), + // Arc::downgrade(&stats), + // encryption_config, + // )); tracing::debug!("done sending stats, and creating bg tasks"); diff --git a/libsql-wal/src/checkpointer.rs b/libsql-wal/src/checkpointer.rs index 869d8e3545..049ceea6f3 100644 --- a/libsql-wal/src/checkpointer.rs +++ b/libsql-wal/src/checkpointer.rs @@ -121,7 +121,6 @@ where pub async fn run(mut self) { loop { if self.should_exit() { - dbg!(); tracing::info!("checkpointer exited cleanly."); return; } diff --git a/libsql-wal/src/segment/list.rs b/libsql-wal/src/segment/list.rs index 330a31e07d..0c5e66df1a 100644 --- a/libsql-wal/src/segment/list.rs +++ b/libsql-wal/src/segment/list.rs @@ -161,6 +161,8 @@ where log_id: log_id.as_u128().into(), }; + db_file.set_len(size_after as u64 * LIBSQL_PAGE_SIZE as u64)?; + let footer_offset = size_after as usize * LIBSQL_PAGE_SIZE as usize; let (_, ret) = db_file .write_all_at_async(ZeroCopyBuf::new_init(footer), footer_offset as u64) @@ -168,7 +170,6 @@ where ret?; // todo: truncate if necessary - //// TODO: make async db_file.sync_all()?; @@ -196,8 +197,6 @@ where self.len.fetch_sub(segs.len(), Ordering::Relaxed); - db_file.set_len(size_after as u64 * 4096)?; - Ok(Some(last_replication_index)) } diff --git a/libsql-wal/src/storage/async_storage.rs b/libsql-wal/src/storage/async_storage.rs index 7b6e37f29f..6b3f4d06a3 100644 --- a/libsql-wal/src/storage/async_storage.rs +++ b/libsql-wal/src/storage/async_storage.rs @@ -93,7 +93,6 @@ where self.fetch_durable_frame_no_async(namespace, ret, config_override); } Some(StorageLoopMessage::Shutdown(ret)) => { - dbg!(); notify_shutdown.replace(ret); shutting_down = true; tracing::info!("Storage shutting down"); @@ -177,7 +176,6 @@ where type Config = B::Config; async fn shutdown(&self) { - dbg!(); let (snd, rcv) = oneshot::channel(); let _ = self.job_sender.send(StorageLoopMessage::Shutdown(snd)); let _ = rcv.await; diff --git a/libsql-wal/src/storage/backend/s3.rs b/libsql-wal/src/storage/backend/s3.rs index b81dd29ca8..811c84178b 100644 --- a/libsql-wal/src/storage/backend/s3.rs +++ b/libsql-wal/src/storage/backend/s3.rs @@ -265,8 +265,6 @@ impl S3Backend { } let next_frame_no = header.start_frame_no.get() - 1; - dbg!(next_frame_no); - dbg!((seen.len(), db_size)); let Some(key) = self .find_segment_inner(config, &folder_key, next_frame_no) .await? diff --git a/libsql-wal/src/storage/mod.rs b/libsql-wal/src/storage/mod.rs index 45edff1d12..fdfe2e15e8 100644 --- a/libsql-wal/src/storage/mod.rs +++ b/libsql-wal/src/storage/mod.rs @@ -186,7 +186,7 @@ pub trait Storage: Send + Sync + 'static { config_override: Option, ) -> impl Future>> + Send; - fn shutdown(&self) -> impl Future + Send { async { dbg!(()) } } + fn shutdown(&self) -> impl Future + Send { async { () } } } /// special zip function for Either storage implementation diff --git a/libsql-wal/src/wal.rs b/libsql-wal/src/wal.rs index 4cbf81b10e..079a7127b9 100644 --- a/libsql-wal/src/wal.rs +++ b/libsql-wal/src/wal.rs @@ -6,7 +6,7 @@ use std::sync::Arc; use libsql_sys::name::NamespaceResolver; use libsql_sys::wal::{Wal, WalManager}; -use crate::io::Io; +use crate::io::{FileExt as _, Io}; use crate::registry::WalRegistry; use crate::segment::sealed::SealedSegment; use crate::shared_wal::SharedWal;