diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..9ba3f68 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,51 @@ +## Verify Changes + +After making code changes, always run this sequence: + +``` +cargo fmt --all && cargo clippy --all-targets --all-features -- -D warnings && cargo test --quiet +``` + +## Code Style & Conventions + +- Use `thiserror` for errors. +- Prefer `Result` over `.unwrap()` / `.expect()` - handle errors explicitly +- Use `impl Trait` in argument position for simple generic bounds +- Prefer iterators and combinators over manual loops where readable +- Destructure structs at use sites when accessing multiple fields +- Use `#[must_use]` on functions whose return values should not be ignored +- You are NOT ALLOWED to add useless code separators like this: + + ```rust + // --------------------------------------------------------------------------- + // Some section + // --------------------------------------------------------------------------- + ``` + + These are considered bad practice and indicate that the code is not + well-structured. Prefer using functions and modules to organize your code. + + If you feel the need to add such separators, it likely means that your code + is too long and should be refactored into smaller, more manageable pieces. + +## Module Organization + +- One public type per file when the type is complex +- Re-export public API from `lib.rs` / `mod.rs` +- Keep `mod` declarations in parent, not via `mod.rs` in subdirectories (2018 edition style) +- Group imports: std → external crates → crate-internal (`use crate::...`) + +## Async / Concurrency + +- Runtime: tokio (multi-threaded) +- Prefer `tokio::spawn` for concurrent tasks; use `JoinSet` for structured concurrency +- Use `tokio::select!` for racing futures; always include cancellation safety notes +- Channels: `tokio::sync::mpsc` for multi-producer, `tokio::sync::oneshot` for request-response +- Never block the async runtime — offload blocking work with `tokio::task::spawn_blocking` + +## Dependencies + +- Check for existing deps with `cargo tree` before adding new crates +- Pin major versions in `Cargo.toml` (e.g., `serde = "1"`) +- Minimize feature flags — only enable what you use +- Audit new deps: check download counts, maintenance status, and `cargo audit` diff --git a/Cargo.lock b/Cargo.lock index 82b64b1..2aeba04 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -513,6 +513,12 @@ version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + [[package]] name = "encode_unicode" version = "1.0.0" @@ -766,6 +772,9 @@ dependencies = [ "mesa-dev", "nix", "num-traits", + "opentelemetry", + "opentelemetry-otlp", + "opentelemetry_sdk", "rand", "reqwest", "reqwest-middleware", @@ -783,10 +792,17 @@ dependencies = [ "toml", "tracing", "tracing-indicatif", + "tracing-opentelemetry", "tracing-subscriber", "vergen-gitcl", ] +[[package]] +name = "glob" +version = "0.3.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" + [[package]] name = "h2" version = "0.4.13" @@ -1182,6 +1198,15 @@ version = "1.70.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" +[[package]] +name = "itertools" +version = "0.14.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] + [[package]] name = "itoa" version = "1.0.17" @@ -1282,9 +1307,9 @@ checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" [[package]] name = "mesa-dev" -version = "1.8.0" +version = "1.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fb5d4651070e6257276f86eb0737bfe37bd6a6a73f7de827fca4efaef55da091" +checksum = "0685415ca22ab4f72a6a3f0f720fcca0c69d295d2a95a40cd3ce92555103d3a1" dependencies = [ "async-stream", "futures-core", @@ -1294,6 +1319,7 @@ dependencies = [ "serde", "serde_json", "serde_path_to_error", + "tracing", ] [[package]] @@ -1464,6 +1490,84 @@ dependencies = [ "vcpkg", ] +[[package]] +name = "opentelemetry" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9e87237e2775f74896f9ad219d26a2081751187eb7c9f5c58dde20a23b95d16c" +dependencies = [ + "futures-core", + "futures-sink", + "js-sys", + "pin-project-lite", + "thiserror 2.0.18", + "tracing", +] + +[[package]] +name = "opentelemetry-http" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "46d7ab32b827b5b495bd90fa95a6cb65ccc293555dcc3199ae2937d2d237c8ed" +dependencies = [ + "async-trait", + "bytes", + "http", + "opentelemetry", + "reqwest", + "tracing", +] + +[[package]] +name = "opentelemetry-otlp" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d899720fe06916ccba71c01d04ecd77312734e2de3467fd30d9d580c8ce85656" +dependencies = [ + "futures-core", + "http", + "opentelemetry", + "opentelemetry-http", + "opentelemetry-proto", + "opentelemetry_sdk", + "prost", + "reqwest", + "thiserror 2.0.18", + "tracing", +] + +[[package]] +name = "opentelemetry-proto" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8c40da242381435e18570d5b9d50aca2a4f4f4d8e146231adb4e7768023309b3" +dependencies = [ + "opentelemetry", + "opentelemetry_sdk", + "prost", + "tonic", +] + +[[package]] +name = "opentelemetry_sdk" +version = "0.29.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "afdefb21d1d47394abc1ba6c57363ab141be19e27cc70d0e422b7f303e4d290b" +dependencies = [ + "futures-channel", + "futures-executor", + "futures-util", + "glob", + "opentelemetry", + "percent-encoding", + "rand", + "serde_json", + "thiserror 2.0.18", + "tokio", + "tokio-stream", + "tracing", +] + [[package]] name = "option-ext" version = "0.2.0" @@ -1509,6 +1613,26 @@ version = "2.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" +[[package]] +name = "pin-project" +version = "1.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "677f1add503faace112b9f1373e43e9e054bfdd22ff1a63c1bc485eaec6a6a8a" +dependencies = [ + "pin-project-internal", +] + +[[package]] +name = "pin-project-internal" +version = "1.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6e918e4ff8c4549eb882f14b3a4bc8c8bc93de829416eacf579f1207a8fbf861" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "pin-project-lite" version = "0.2.16" @@ -1576,6 +1700,29 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "prost" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +dependencies = [ + "bytes", + "prost-derive", +] + +[[package]] +name = "prost-derive" +version = "0.13.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +dependencies = [ + "anyhow", + "itertools", + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "quick-xml" version = "0.37.5" @@ -2466,6 +2613,17 @@ dependencies = [ "tokio", ] +[[package]] +name = "tokio-stream" +version = "0.1.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32da49809aab5c3bc678af03902d4ccddea2a87d028d86392a4b1560c6906c70" +dependencies = [ + "futures-core", + "pin-project-lite", + "tokio", +] + [[package]] name = "tokio-util" version = "0.7.18" @@ -2518,6 +2676,27 @@ version = "1.0.6+spec-1.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ab16f14aed21ee8bfd8ec22513f7287cd4a91aa92e44edfe2c17ddd004e92607" +[[package]] +name = "tonic" +version = "0.12.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "877c5b330756d856ffcc4553ab34a5684481ade925ecc54bcd1bf02b1d0d4d52" +dependencies = [ + "async-trait", + "base64", + "bytes", + "http", + "http-body", + "http-body-util", + "percent-encoding", + "pin-project", + "prost", + "tokio-stream", + "tower-layer", + "tower-service", + "tracing", +] + [[package]] name = "tower" version = "0.5.3" @@ -2618,6 +2797,24 @@ dependencies = [ "tracing-core", ] +[[package]] +name = "tracing-opentelemetry" +version = "0.30.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fd8e764bd6f5813fd8bebc3117875190c5b0415be8f7f8059bffb6ecd979c444" +dependencies = [ + "js-sys", + "once_cell", + "opentelemetry", + "opentelemetry_sdk", + "smallvec", + "tracing", + "tracing-core", + "tracing-log", + "tracing-subscriber", + "web-time", +] + [[package]] name = "tracing-subscriber" version = "0.3.22" diff --git a/Cargo.toml b/Cargo.toml index 3c0f8cf..25329d0 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -12,7 +12,7 @@ clap = { version = "4.5.54", features = ["derive", "env"] } tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt"] } fuser = { version = "0.16.0", features = ["libfuse"] } libc = "0.2" -mesa-dev = "1.8.0" +mesa-dev = "1.11.0" num-traits = "0.2" reqwest = { version = "0.12", default-features = false } reqwest-middleware = "0.4" @@ -42,10 +42,15 @@ semver = "1.0" shellexpand = "3.1" inquire = "0.9.2" tracing-indicatif = "0.3.14" +opentelemetry = { version = "0.29", optional = true } +opentelemetry_sdk = { version = "0.29", features = ["rt-tokio"], optional = true } +opentelemetry-otlp = { version = "0.29", features = ["http-proto", "trace", "reqwest-client"], optional = true } +tracing-opentelemetry = { version = "0.30", optional = true } [features] default = [] staging = [] +__otlp_export = ["opentelemetry", "opentelemetry_sdk", "opentelemetry-otlp", "tracing-opentelemetry"] [build-dependencies] vergen-gitcl = { version = "1", features = [] } diff --git a/src/fs/fuser.rs b/src/fs/fuser.rs index 7fae3df..86ddabb 100644 --- a/src/fs/fuser.rs +++ b/src/fs/fuser.rs @@ -154,7 +154,7 @@ where F::ReaddirError: Into, F::ReleaseError: Into, { - #[instrument(skip(self, _req, reply))] + #[instrument(name = "FuserAdapter::lookup", skip(self, _req, reply))] fn lookup( &mut self, _req: &fuser::Request<'_>, @@ -178,7 +178,7 @@ where } } - #[instrument(skip(self, _req, fh, reply))] + #[instrument(name = "FuserAdapter::getattr", skip(self, _req, fh, reply))] fn getattr( &mut self, _req: &fuser::Request<'_>, @@ -198,7 +198,7 @@ where } } - #[instrument(skip(self, _req, _fh, offset, reply))] + #[instrument(name = "FuserAdapter::readdir", skip(self, _req, _fh, offset, reply))] fn readdir( &mut self, _req: &fuser::Request<'_>, @@ -243,7 +243,7 @@ where reply.ok(); } - #[instrument(skip(self, _req, flags, reply))] + #[instrument(name = "FuserAdapter::open", skip(self, _req, flags, reply))] fn open(&mut self, _req: &fuser::Request<'_>, ino: u64, flags: i32, reply: fuser::ReplyOpen) { match self.runtime.block_on(self.fs.open(ino, flags.into())) { Ok(open_file) => { @@ -257,7 +257,10 @@ where } } - #[instrument(skip(self, _req, fh, offset, size, flags, lock_owner, reply))] + #[instrument( + name = "FuserAdapter::read", + skip(self, _req, fh, offset, size, flags, lock_owner, reply) + )] fn read( &mut self, _req: &fuser::Request<'_>, @@ -290,7 +293,7 @@ where } } - #[instrument(skip(self, _req, _lock_owner, reply))] + #[instrument(name = "FuserAdapter::release", skip(self, _req, _lock_owner, reply))] fn release( &mut self, _req: &fuser::Request<'_>, @@ -316,12 +319,12 @@ where } } - #[instrument(skip(self, _req, nlookup))] + #[instrument(name = "FuserAdapter::forget", skip(self, _req, nlookup))] fn forget(&mut self, _req: &fuser::Request<'_>, ino: u64, nlookup: u64) { self.runtime.block_on(self.fs.forget(ino, nlookup)); } - #[instrument(skip(self, _req, _ino, reply))] + #[instrument(name = "FuserAdapter::statfs", skip(self, _req, _ino, reply))] fn statfs(&mut self, _req: &fuser::Request<'_>, _ino: u64, reply: fuser::ReplyStatfs) { self.runtime.block_on(async { match self.fs.statfs().await { diff --git a/src/fs/icache/async_cache.rs b/src/fs/icache/async_cache.rs new file mode 100644 index 0000000..84003da --- /dev/null +++ b/src/fs/icache/async_cache.rs @@ -0,0 +1,1410 @@ +//! Async inode cache with InFlight/Available state machine. + +use std::future::Future; + +use scc::HashMap as ConcurrentHashMap; +use tokio::sync::watch; + +use tracing::{instrument, trace, warn}; + +use crate::fs::r#trait::Inode; + +use super::IcbLike; + +/// State of an entry in the async inode cache. +pub enum IcbState { + /// Entry is being loaded; waiters clone the receiver and `.changed().await`. + /// + /// The channel carries `()` rather than the resolved value because the map + /// is the single source of truth: ICBs are mutated in-place (rc, attrs) so + /// a snapshot in the channel would immediately go stale. Sender-drop also + /// gives us implicit, leak-proof signalling on both success and error paths. + InFlight(watch::Receiver<()>), + /// Entry is ready for use. + Available(I), +} + +impl IcbState { + /// Consume `self`, returning the inner value if `Available`, or `None` if `InFlight`. + fn into_available(self) -> Option { + match self { + Self::Available(inner) => Some(inner), + Self::InFlight(_) => None, + } + } +} + +/// Trait for resolving an inode to its control block. +/// +/// Implementations act as a "promise" that an ICB will eventually be produced +/// for a given inode. The cache calls `resolve` when it needs to populate a +/// missing entry. +pub trait IcbResolver: Send + Sync { + /// The inode control block type this resolver produces. + type Icb: IcbLike + Send + Sync; + /// Error type returned when resolution fails. + type Error: Send; + + /// Resolve an inode to a fully-populated control block. + /// + /// - `stub`: `Some(icb)` if upgrading an existing stub entry, `None` if creating + /// from scratch. The stub typically has `parent` and `path` set but `attr` missing. + /// - `cache`: reference to the cache, useful for walking parent chains to build paths. + fn resolve( + &self, + ino: Inode, + stub: Option, + cache: &AsyncICache, + ) -> impl Future> + Send + where + Self: Sized; +} + +/// Async, concurrency-safe inode cache. +/// +/// All methods take `&self` — internal synchronization is provided by +/// `scc::HashMap` (sharded lock-free map). +pub struct AsyncICache { + resolver: R, + inode_table: ConcurrentHashMap>, +} + +impl AsyncICache { + /// Create a new cache with a root ICB at `root_ino` (rc = 1). + pub fn new(resolver: R, root_ino: Inode, root_path: impl Into) -> Self { + let table = ConcurrentHashMap::new(); + // insert_sync is infallible for a fresh map + drop(table.insert_sync( + root_ino, + IcbState::Available(R::Icb::new_root(root_path.into())), + )); + Self { + resolver, + inode_table: table, + } + } + + /// Number of entries (`InFlight` + `Available`) in the table. + pub fn inode_count(&self) -> usize { + self.inode_table.len() + } + + /// Wait until `ino` is `Available`. + /// Returns `true` if the entry exists and is Available, + /// `false` if the entry does not exist. + #[instrument(name = "AsyncICache::wait_for_available", skip(self))] + async fn wait_for_available(&self, ino: Inode) -> bool { + loop { + let rx = self + .inode_table + .read_async(&ino, |_, s| match s { + IcbState::InFlight(rx) => Some(rx.clone()), + IcbState::Available(_) => None, + }) + .await; + + match rx { + None => return false, // key missing + Some(None) => return true, // Available + Some(Some(mut rx)) => { + // Wait for the resolver to complete (or fail/drop sender). + // changed() returns Err(RecvError) when sender is dropped, + // which is fine — it means resolution finished. + let _ = rx.changed().await; + // Loop back — the entry might be InFlight again if another + // resolution cycle started between our wakeup and re-read. + } + } + } + } + + /// Check whether `ino` has an entry in the table (either `InFlight` or `Available`). + /// + /// This is a non-blocking, synchronous check. It does **not** wait for + /// `InFlight` entries to resolve. + pub fn contains(&self, ino: Inode) -> bool { + self.inode_table.contains_sync(&ino) + } + + /// Read an ICB via closure. **Awaits** if `InFlight`. + /// Returns `None` if `ino` doesn't exist. + #[instrument(name = "AsyncICache::get_icb", skip(self, f))] + // `Sync` is required because `f` is held across `.await` points in the + // loop body; for the resulting future to be `Send`, the captured closure + // must be `Sync` (clippy::future_not_send). + pub async fn get_icb( + &self, + ino: Inode, + f: impl Fn(&R::Icb) -> T + Send + Sync, + ) -> Option { + loop { + if !self.wait_for_available(ino).await { + return None; + } + let result = self + .inode_table + .read_async(&ino, |_, state| match state { + IcbState::Available(icb) => Some(f(icb)), + IcbState::InFlight(_) => None, + }) + .await; + match result { + Some(Some(val)) => return Some(val), + Some(None) => {} // was InFlight, retry + None => return None, // key missing + } + } + } + + /// Mutate an ICB via closure. **Awaits** if `InFlight`. + /// Returns `None` if `ino` doesn't exist. + #[instrument(name = "AsyncICache::get_icb_mut", skip(self, f))] + pub async fn get_icb_mut( + &self, + ino: Inode, + mut f: impl FnMut(&mut R::Icb) -> T + Send, + ) -> Option { + loop { + if !self.wait_for_available(ino).await { + return None; + } + let result = self + .inode_table + .update_async(&ino, |_, state| match state { + IcbState::Available(icb) => Some(f(icb)), + IcbState::InFlight(_) => None, + }) + .await; + match result { + Some(Some(val)) => return Some(val), + Some(None) => {} // was InFlight, retry + None => return None, // key missing + } + } + } + + /// Insert an ICB directly as `Available`. If the entry is currently + /// `InFlight`, waits for resolution before overwriting. + #[instrument(name = "AsyncICache::insert_icb", skip(self, icb))] + pub async fn insert_icb(&self, ino: Inode, icb: R::Icb) { + use scc::hash_map::Entry; + let mut icb = Some(icb); + loop { + match self.inode_table.entry_async(ino).await { + Entry::Vacant(vac) => { + let val = icb + .take() + .unwrap_or_else(|| unreachable!("icb consumed more than once")); + vac.insert_entry(IcbState::Available(val)); + return; + } + Entry::Occupied(mut occ) => match occ.get_mut() { + IcbState::InFlight(rx) => { + let mut rx = rx.clone(); + drop(occ); + let _ = rx.changed().await; + } + IcbState::Available(_) => { + let val = icb + .take() + .unwrap_or_else(|| unreachable!("icb consumed more than once")); + *occ.get_mut() = IcbState::Available(val); + return; + } + }, + } + } + } + + /// Get-or-insert pattern. If `ino` exists (awaits `InFlight`), runs `then` + /// on it. If absent, calls `factory` to create, inserts, then runs `then`. + /// + /// Both `factory` and `then` are `FnOnce` — wrapped in `Option` internally + /// to satisfy the borrow checker across the await-loop. + #[instrument(name = "AsyncICache::entry_or_insert_icb", skip(self, factory, then))] + pub async fn entry_or_insert_icb( + &self, + ino: Inode, + factory: impl FnOnce() -> R::Icb, + then: impl FnOnce(&mut R::Icb) -> T, + ) -> T { + use scc::hash_map::Entry; + let mut factory = Some(factory); + let mut then_fn = Some(then); + + loop { + match self.inode_table.entry_async(ino).await { + Entry::Occupied(mut occ) => match occ.get_mut() { + IcbState::Available(icb) => { + let t = then_fn + .take() + .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); + return t(icb); + } + IcbState::InFlight(rx) => { + let mut rx = rx.clone(); + drop(occ); // release shard lock before awaiting + let _ = rx.changed().await; + } + }, + Entry::Vacant(vac) => { + let f = factory + .take() + .unwrap_or_else(|| unreachable!("factory consumed more than once")); + let t = then_fn + .take() + .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); + let mut icb = f(); + let result = t(&mut icb); + vac.insert_entry(IcbState::Available(icb)); + return result; + } + } + } + } + + /// Write an ICB back to the table only if the entry still exists. + /// + /// If the entry was evicted (vacant) during resolution, the result is + /// silently dropped — this prevents resurrecting entries that a concurrent + /// `forget` has already removed. + async fn write_back_if_present(&self, ino: Inode, icb: R::Icb) { + use scc::hash_map::Entry; + match self.inode_table.entry_async(ino).await { + Entry::Occupied(mut occ) => { + *occ.get_mut() = IcbState::Available(icb); + } + Entry::Vacant(_) => { + tracing::debug!( + ino, + "resolved inode was evicted during resolution, dropping result" + ); + } + } + } + + /// Look up `ino`. If `Available` and fully resolved, run `then` and return + /// `Ok(T)`. If `Available` but `needs_resolve()` is true (stub), extract + /// the stub, resolve it, cache the result, then run `then`. If absent, call + /// the resolver to fetch the ICB, cache it, then run `then`. If another task + /// is already resolving this inode (`InFlight`), wait for it. + /// + /// Returns `Err(R::Error)` if resolution fails. On error the `InFlight` + /// entry is removed so subsequent calls can retry. + #[instrument(name = "AsyncICache::get_or_resolve", skip(self, then))] + pub async fn get_or_resolve( + &self, + ino: Inode, + then: impl FnOnce(&R::Icb) -> T, + ) -> Result { + use scc::hash_map::Entry; + + let mut then_fn = Some(then); + + // Fast path: Available and fully resolved + { + let hit = self + .inode_table + .read_async(&ino, |_, s| match s { + IcbState::Available(icb) if !icb.needs_resolve() => { + let t = then_fn + .take() + .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); + Some(t(icb)) + } + IcbState::InFlight(_) | IcbState::Available(_) => None, + }) + .await; + if let Some(Some(r)) = hit { + return Ok(r); + } + } + + // Slow path: missing, InFlight, or stub needing resolution + loop { + match self.inode_table.entry_async(ino).await { + Entry::Occupied(mut occ) => match occ.get_mut() { + IcbState::Available(icb) if !icb.needs_resolve() => { + let t = then_fn + .take() + .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); + return Ok(t(icb)); + } + IcbState::Available(_) => { + // Stub needing resolution — extract stub, replace with InFlight + let (tx, rx) = watch::channel(()); + let old = std::mem::replace(occ.get_mut(), IcbState::InFlight(rx)); + let stub = old.into_available().unwrap_or_else(|| { + unreachable!("matched Available arm, replaced value must be Available") + }); + let fallback = stub.clone(); + drop(occ); // release shard lock before awaiting + + match self.resolver.resolve(ino, Some(stub), self).await { + Ok(icb) => { + let t = then_fn.take().unwrap_or_else(|| { + unreachable!("then_fn consumed more than once") + }); + let result = t(&icb); + self.write_back_if_present(ino, icb).await; + drop(tx); + return Ok(result); + } + Err(e) => { + if fallback.rc() > 0 { + self.write_back_if_present(ino, fallback).await; + } else { + self.inode_table.remove_async(&ino).await; + } + drop(tx); + return Err(e); + } + } + } + IcbState::InFlight(rx) => { + let mut rx = rx.clone(); + drop(occ); + let _ = rx.changed().await; + } + }, + Entry::Vacant(vac) => { + let (tx, rx) = watch::channel(()); + vac.insert_entry(IcbState::InFlight(rx)); + + match self.resolver.resolve(ino, None, self).await { + Ok(icb) => { + let t = then_fn + .take() + .unwrap_or_else(|| unreachable!("then_fn consumed more than once")); + let result = t(&icb); + self.write_back_if_present(ino, icb).await; + drop(tx); + return Ok(result); + } + Err(e) => { + self.inode_table.remove_async(&ino).await; + drop(tx); + return Err(e); + } + } + } + } + } + } + + /// Increment rc. **Awaits** `InFlight`. + /// + /// Returns `None` if the inode does not exist or was evicted concurrently. + /// This can happen when a concurrent `forget` removes the entry between the + /// caller's insert/cache and this `inc_rc` call, or when a concurrent + /// `get_or_resolve` swaps the entry to `InFlight` and the entry is then + /// evicted on resolution failure. Callers in FUSE `lookup` paths should + /// treat `None` as a lookup failure to avoid ref-count leaks (the kernel + /// would hold a reference the cache no longer tracks). + #[instrument(name = "AsyncICache::inc_rc", skip(self))] + pub async fn inc_rc(&self, ino: Inode) -> Option { + loop { + if !self.wait_for_available(ino).await { + warn!(ino, "inc_rc: inode not in table"); + return None; + } + let result = self + .inode_table + .update_async(&ino, |_, state| match state { + IcbState::Available(icb) => { + *icb.rc_mut() += 1; + Some(icb.rc()) + } + IcbState::InFlight(_) => None, + }) + .await + .flatten(); + + match result { + Some(rc) => return Some(rc), + None => { + // Entry was concurrently replaced with InFlight or evicted. + if !self.contains(ino) { + warn!(ino, "inc_rc: inode evicted concurrently"); + return None; + } + // Entry exists but became InFlight — retry. + } + } + } + } + + /// Decrement rc by `nlookups`. If rc drops to zero, evicts and returns + /// the ICB. **Awaits** `InFlight` entries. + #[instrument(name = "AsyncICache::forget", skip(self))] + pub async fn forget(&self, ino: Inode, nlookups: u64) -> Option { + use scc::hash_map::Entry; + + loop { + match self.inode_table.entry_async(ino).await { + Entry::Occupied(mut occ) => match occ.get_mut() { + IcbState::Available(icb) => { + if icb.rc() <= nlookups { + trace!(ino, "evicting inode"); + let (_, state) = occ.remove_entry(); + return state.into_available(); + } + *icb.rc_mut() -= nlookups; + trace!(ino, new_rc = icb.rc(), "decremented rc"); + return None; + } + IcbState::InFlight(rx) => { + let mut rx = rx.clone(); + drop(occ); + let _ = rx.changed().await; + } + }, + Entry::Vacant(_) => { + warn!(ino, "forget on unknown inode"); + return None; + } + } + } + } + + /// Synchronous mutable access to an `Available` entry. + /// Does **not** wait for `InFlight`. Intended for initialization. + pub fn get_icb_mut_sync(&self, ino: Inode, f: impl FnOnce(&mut R::Icb) -> T) -> Option { + self.inode_table + .update_sync(&ino, |_, state| match state { + IcbState::Available(icb) => Some(f(icb)), + IcbState::InFlight(_) => None, + }) + .flatten() + } + + /// Iterate over all `Available` entries (skips `InFlight`). + /// Async-safe iteration using `iter_async` to avoid contention on single-threaded runtimes. + pub async fn for_each(&self, mut f: impl FnMut(&Inode, &R::Icb)) { + self.inode_table + .iter_async(|ino, state| { + if let IcbState::Available(icb) = state { + f(ino, icb); + } + true // continue iteration + }) + .await; + } +} + +#[cfg(test)] +mod tests { + use super::*; + use std::collections::HashMap as StdHashMap; + use std::path::PathBuf; + use std::sync::atomic::Ordering; + use std::sync::{Arc, Mutex}; + + #[derive(Debug, Clone, PartialEq)] + struct TestIcb { + rc: u64, + path: PathBuf, + resolved: bool, + } + + impl IcbLike for TestIcb { + fn new_root(path: PathBuf) -> Self { + Self { + rc: 1, + path, + resolved: true, + } + } + fn rc(&self) -> u64 { + self.rc + } + fn rc_mut(&mut self) -> &mut u64 { + &mut self.rc + } + fn needs_resolve(&self) -> bool { + !self.resolved + } + } + + struct TestResolver { + responses: Mutex>>, + } + + impl TestResolver { + fn new() -> Self { + Self { + responses: Mutex::new(StdHashMap::new()), + } + } + + fn add(&self, ino: Inode, icb: TestIcb) { + self.responses + .lock() + .expect("test mutex") + .insert(ino, Ok(icb)); + } + + fn add_err(&self, ino: Inode, err: impl Into) { + self.responses + .lock() + .expect("test mutex") + .insert(ino, Err(err.into())); + } + } + + impl IcbResolver for TestResolver { + type Icb = TestIcb; + type Error = String; + + fn resolve( + &self, + ino: Inode, + _stub: Option, + _cache: &AsyncICache, + ) -> impl Future> + Send { + let result = self + .responses + .lock() + .expect("test mutex") + .remove(&ino) + .unwrap_or_else(|| Err(format!("no response for inode {ino}"))); + async move { result } + } + } + + fn test_cache() -> AsyncICache { + AsyncICache::new(TestResolver::new(), 1, "/root") + } + + fn test_cache_with(resolver: TestResolver) -> AsyncICache { + AsyncICache::new(resolver, 1, "/root") + } + + #[tokio::test] + async fn contains_returns_true_for_root() { + let cache = test_cache(); + assert!(cache.contains(1), "root should exist"); + } + + #[tokio::test] + async fn contains_returns_false_for_missing() { + let cache = test_cache(); + assert!(!cache.contains(999), "missing inode should not exist"); + } + + #[tokio::test] + async fn contains_after_resolver_completes() { + let resolver = TestResolver::new(); + resolver.add( + 42, + TestIcb { + rc: 1, + path: "/test".into(), + resolved: true, + }, + ); + let cache = Arc::new(test_cache_with(resolver)); + + // Trigger resolve in background + let cache2 = Arc::clone(&cache); + let handle = tokio::spawn(async move { cache2.get_or_resolve(42, |_| ()).await }); + + handle + .await + .expect("task panicked") + .expect("resolve failed"); + assert!(cache.contains(42), "should be true after resolve"); + } + + #[tokio::test] + async fn new_creates_root_entry() { + let cache = test_cache(); + assert_eq!(cache.inode_count(), 1, "should have exactly 1 entry"); + } + + #[tokio::test] + async fn get_icb_returns_value() { + let cache = test_cache(); + let path = cache.get_icb(1, |icb| icb.path.clone()).await; + assert_eq!(path, Some(PathBuf::from("/root"))); + } + + #[tokio::test] + async fn get_icb_returns_none_for_missing() { + let cache = test_cache(); + let result = cache.get_icb(999, IcbLike::rc).await; + assert_eq!(result, None, "missing inode should return None"); + } + + #[tokio::test] + async fn get_icb_mut_modifies_value() { + let cache = test_cache(); + cache + .get_icb_mut(1, |icb| { + *icb.rc_mut() += 10; + }) + .await; + let rc = cache.get_icb(1, IcbLike::rc).await; + assert_eq!(rc, Some(11), "root starts at rc=1, +10 = 11"); + } + + #[tokio::test] + async fn get_icb_after_resolver_completes() { + let resolver = TestResolver::new(); + resolver.add( + 42, + TestIcb { + rc: 1, + path: "/loaded".into(), + resolved: true, + }, + ); + let cache = test_cache_with(resolver); + + // Resolve inode 42 + cache + .get_or_resolve(42, |_| ()) + .await + .expect("resolve failed"); + + let path = cache.get_icb(42, |icb| icb.path.clone()).await; + assert_eq!(path, Some(PathBuf::from("/loaded"))); + } + + #[tokio::test] + async fn insert_icb_adds_entry() { + let cache = test_cache(); + cache + .insert_icb( + 42, + TestIcb { + rc: 1, + path: "/foo".into(), + resolved: true, + }, + ) + .await; + assert!(cache.contains(42), "inserted entry should exist"); + assert_eq!(cache.inode_count(), 2, "root + inserted = 2"); + } + + #[tokio::test] + async fn insert_icb_does_not_clobber_inflight() { + let cache = Arc::new(test_cache()); + let (tx, rx) = watch::channel(()); + cache + .inode_table + .upsert_async(42, IcbState::InFlight(rx)) + .await; + + // Spawn insert_icb in background — should wait for InFlight to resolve + let cache2 = Arc::clone(&cache); + let handle = tokio::spawn(async move { + cache2 + .insert_icb( + 42, + TestIcb { + rc: 5, + path: "/inserted".into(), + resolved: true, + }, + ) + .await; + }); + + // Give insert_icb time to start waiting + tokio::task::yield_now().await; + + // Complete the InFlight from the resolver side (write directly) + cache + .inode_table + .upsert_async( + 42, + IcbState::Available(TestIcb { + rc: 1, + path: "/resolved".into(), + resolved: true, + }), + ) + .await; + drop(tx); // signal watchers + + handle.await.expect("task panicked"); + + // After insert_icb completes, it should have overwritten the resolved value + let path = cache.get_icb(42, |icb| icb.path.clone()).await; + assert_eq!(path, Some(PathBuf::from("/inserted"))); + } + + #[tokio::test] + async fn entry_or_insert_creates_new() { + let cache = test_cache(); + let rc = cache + .entry_or_insert_icb( + 42, + || TestIcb { + rc: 0, + path: "/new".into(), + resolved: true, + }, + |icb| { + *icb.rc_mut() += 1; + icb.rc() + }, + ) + .await; + assert_eq!(rc, 1, "factory creates rc=0, then +1 = 1"); + } + + #[tokio::test] + async fn entry_or_insert_returns_existing() { + let cache = test_cache(); + cache + .insert_icb( + 42, + TestIcb { + rc: 5, + path: "/existing".into(), + resolved: true, + }, + ) + .await; + + let rc = cache + .entry_or_insert_icb( + 42, + || panic!("factory should not be called"), + |icb| icb.rc(), + ) + .await; + assert_eq!(rc, 5, "existing entry rc should be 5"); + } + + #[tokio::test] + async fn entry_or_insert_after_resolver_completes() { + let resolver = TestResolver::new(); + resolver.add( + 42, + TestIcb { + rc: 1, + path: "/resolved".into(), + resolved: true, + }, + ); + let cache = Arc::new(test_cache_with(resolver)); + + // Start resolve in background + let cache2 = Arc::clone(&cache); + let resolve_handle = tokio::spawn(async move { cache2.get_or_resolve(42, |_| ()).await }); + + // Wait for resolve to finish + resolve_handle + .await + .expect("task panicked") + .expect("resolve failed"); + + // Now entry_or_insert should find the existing entry + let rc = cache + .entry_or_insert_icb( + 42, + || panic!("factory should not be called"), + |icb| icb.rc(), + ) + .await; + assert_eq!(rc, 1, "should find the resolved entry"); + } + + #[tokio::test] + async fn inc_rc_increments() { + let cache = test_cache(); + cache + .insert_icb( + 42, + TestIcb { + rc: 1, + path: "/a".into(), + resolved: true, + }, + ) + .await; + let new_rc = cache.inc_rc(42).await; + assert_eq!(new_rc, Some(2), "rc 1 + 1 = 2"); + } + + #[tokio::test] + async fn forget_decrements_rc() { + let cache = test_cache(); + cache + .insert_icb( + 42, + TestIcb { + rc: 5, + path: "/a".into(), + resolved: true, + }, + ) + .await; + + let evicted = cache.forget(42, 2).await; + assert!(evicted.is_none(), "rc 5 - 2 = 3, should not evict"); + + let rc = cache.get_icb(42, IcbLike::rc).await; + assert_eq!(rc, Some(3), "rc should be 3 after forget(2)"); + } + + #[tokio::test] + async fn forget_evicts_when_rc_drops_to_zero() { + let cache = test_cache(); + cache + .insert_icb( + 42, + TestIcb { + rc: 3, + path: "/a".into(), + resolved: true, + }, + ) + .await; + + let evicted = cache.forget(42, 3).await; + assert!(evicted.is_some(), "rc 3 - 3 = 0, should evict"); + assert!(!cache.contains(42), "evicted entry should be gone"); + assert_eq!(cache.inode_count(), 1, "only root remains"); + } + + #[tokio::test] + async fn forget_unknown_inode_returns_none() { + let cache = test_cache(); + let evicted = cache.forget(999, 1).await; + assert!(evicted.is_none(), "unknown inode should return None"); + } + + #[tokio::test] + async fn for_each_iterates_available_entries() { + let cache = test_cache(); + cache + .insert_icb( + 2, + TestIcb { + rc: 1, + path: "/a".into(), + resolved: true, + }, + ) + .await; + cache + .insert_icb( + 3, + TestIcb { + rc: 1, + path: "/b".into(), + resolved: true, + }, + ) + .await; + + let mut seen = std::collections::HashSet::new(); + cache + .for_each(|ino, _icb| { + seen.insert(*ino); + }) + .await; + assert_eq!(seen.len(), 3, "should see all 3 entries"); + assert!(seen.contains(&1), "should contain root"); + assert!(seen.contains(&2), "should contain inode 2"); + assert!(seen.contains(&3), "should contain inode 3"); + } + + #[tokio::test] + async fn for_each_skips_inflight() { + let cache = test_cache(); + // Directly insert an InFlight entry for testing iteration + let (_tx, rx) = watch::channel(()); + cache + .inode_table + .upsert_async(42, IcbState::InFlight(rx)) + .await; + + let mut count = 0; + cache + .for_each(|_, _| { + count += 1; + }) + .await; + assert_eq!(count, 1, "only root, not the InFlight entry"); + } + + #[tokio::test] + async fn wait_does_not_miss_signal_on_immediate_complete() { + let cache = Arc::new(test_cache()); + + // Insert InFlight manually, then immediately complete before anyone waits + let (tx, rx) = watch::channel(()); + cache + .inode_table + .upsert_async(42, IcbState::InFlight(rx)) + .await; + + // Complete before any waiter (simulate resolver by writing directly) + cache + .inode_table + .upsert_async( + 42, + IcbState::Available(TestIcb { + rc: 1, + path: "/fast".into(), + resolved: true, + }), + ) + .await; + drop(tx); + + assert!(cache.contains(42), "entry should exist in table"); + } + + // -- get_or_resolve tests -- + + #[tokio::test] + async fn get_or_resolve_returns_existing() { + let cache = test_cache(); + cache + .insert_icb( + 42, + TestIcb { + rc: 1, + path: "/existing".into(), + resolved: true, + }, + ) + .await; + + let path: Result = cache.get_or_resolve(42, |icb| icb.path.clone()).await; + assert_eq!(path, Ok(PathBuf::from("/existing"))); + } + + #[tokio::test] + async fn get_or_resolve_resolves_missing() { + let resolver = TestResolver::new(); + resolver.add( + 42, + TestIcb { + rc: 1, + path: "/resolved".into(), + resolved: true, + }, + ); + let cache = test_cache_with(resolver); + + let path: Result = cache.get_or_resolve(42, |icb| icb.path.clone()).await; + assert_eq!(path, Ok(PathBuf::from("/resolved"))); + // Should now be cached + assert!(cache.contains(42)); + } + + #[tokio::test] + async fn get_or_resolve_propagates_error() { + let resolver = TestResolver::new(); + resolver.add_err(42, "network error"); + let cache = test_cache_with(resolver); + + let result: Result = + cache.get_or_resolve(42, |icb| icb.path.clone()).await; + assert_eq!(result, Err("network error".to_owned())); + // Entry should be cleaned up on error + assert!(!cache.contains(42)); + } + + struct CountingResolver { + count: Arc, + } + + impl IcbResolver for CountingResolver { + type Icb = TestIcb; + type Error = String; + + fn resolve( + &self, + _ino: Inode, + _stub: Option, + _cache: &AsyncICache, + ) -> impl Future> + Send { + self.count.fetch_add(1, Ordering::SeqCst); + async { + tokio::task::yield_now().await; + Ok(TestIcb { + rc: 1, + path: "/coalesced".into(), + resolved: true, + }) + } + } + } + + #[tokio::test] + async fn get_or_resolve_coalesces_concurrent_requests() { + use std::sync::atomic::AtomicUsize; + + let resolve_count = Arc::new(AtomicUsize::new(0)); + + let cache = Arc::new(AsyncICache::new( + CountingResolver { + count: Arc::clone(&resolve_count), + }, + 1, + "/root", + )); + + let mut handles = Vec::new(); + for _ in 0..5 { + let c = Arc::clone(&cache); + handles.push(tokio::spawn(async move { + c.get_or_resolve(42, |icb| icb.path.clone()).await + })); + } + + for h in handles { + assert_eq!( + h.await.expect("task panicked"), + Ok(PathBuf::from("/coalesced")), + ); + } + + // Resolver should only have been called ONCE (not 5 times) + assert_eq!( + resolve_count.load(Ordering::SeqCst), + 1, + "should coalesce to 1 resolve call" + ); + } + + #[test] + fn icb_state_into_available_returns_inner() { + let state = IcbState::Available(TestIcb { + rc: 1, + path: "/test".into(), + resolved: true, + }); + assert!(state.into_available().is_some()); + } + + #[test] + fn icb_state_into_available_returns_none_for_inflight() { + let (_tx, rx) = watch::channel(()); + let state: IcbState = IcbState::InFlight(rx); + assert!(state.into_available().is_none()); + } + + #[tokio::test] + async fn get_or_resolve_resolves_stub_entry() { + let resolver = TestResolver::new(); + resolver.add( + 42, + TestIcb { + rc: 1, + path: "/resolved".into(), + resolved: true, + }, + ); + let cache = test_cache_with(resolver); + + // Insert unresolved stub + cache + .insert_icb( + 42, + TestIcb { + rc: 0, + path: "/stub".into(), + resolved: false, + }, + ) + .await; + + // get_or_resolve should trigger resolution because needs_resolve() == true + let path: Result = cache.get_or_resolve(42, |icb| icb.path.clone()).await; + assert_eq!(path, Ok(PathBuf::from("/resolved"))); + } + + #[tokio::test] + async fn forget_handles_inflight_entry() { + let cache = Arc::new(test_cache()); + let (tx, rx) = watch::channel(()); + cache + .inode_table + .upsert_async(42, IcbState::InFlight(rx)) + .await; + + let cache2 = Arc::clone(&cache); + let handle = tokio::spawn(async move { cache2.forget(42, 1).await }); + + // Give forget time to start waiting + tokio::task::yield_now().await; + + // Simulate resolver completing (write directly to inode_table) + cache + .inode_table + .upsert_async( + 42, + IcbState::Available(TestIcb { + rc: 3, + path: "/inflight".into(), + resolved: true, + }), + ) + .await; + drop(tx); + + let evicted = handle.await.expect("task panicked"); + assert!(evicted.is_none(), "rc=3 - 1 = 2, should not evict"); + + let rc = cache.get_icb(42, IcbLike::rc).await; + assert_eq!(rc, Some(2), "rc should be 2 after forget(1) on rc=3"); + } + + #[tokio::test] + async fn get_or_resolve_error_preserves_stub_with_nonzero_rc() { + let resolver = TestResolver::new(); + resolver.add_err(42, "resolve failed"); + let cache = test_cache_with(resolver); + + // Insert a stub with rc=2 (simulates a looked-up entry needing resolution) + cache + .insert_icb( + 42, + TestIcb { + rc: 2, + path: "/stub".into(), + resolved: false, + }, + ) + .await; + + // get_or_resolve should fail + let result: Result = + cache.get_or_resolve(42, |icb| icb.path.clone()).await; + assert!(result.is_err(), "should propagate resolver error"); + + // The stub should be preserved since rc > 0 + assert!(cache.contains(42), "entry with rc=2 should survive error"); + let rc = cache.get_icb(42, IcbLike::rc).await; + assert_eq!(rc, Some(2), "rc should be preserved"); + } + + #[tokio::test] + async fn inc_rc_missing_inode_returns_none() { + let cache = test_cache(); + assert_eq!(cache.inc_rc(999).await, None); + } + + #[tokio::test] + async fn inc_rc_waits_for_inflight() { + let cache = Arc::new(test_cache()); + let (tx, rx) = watch::channel(()); + cache + .inode_table + .upsert_async(42, IcbState::InFlight(rx)) + .await; + + let cache2 = Arc::clone(&cache); + let handle = tokio::spawn(async move { cache2.inc_rc(42).await }); + + // Simulate resolver completing by writing directly to inode_table + cache + .inode_table + .upsert_async( + 42, + IcbState::Available(TestIcb { + rc: 1, + path: "/a".into(), + resolved: true, + }), + ) + .await; + drop(tx); + + let result = handle + .await + .unwrap_or_else(|e| panic!("task panicked: {e}")); + assert_eq!( + result, + Some(2), + "waited for Available, then incremented 1 -> 2" + ); + } + + #[tokio::test] + async fn inc_rc_returns_none_after_concurrent_eviction() { + let cache = Arc::new(test_cache()); + let (tx, rx) = watch::channel(()); + cache + .inode_table + .upsert_async(42, IcbState::InFlight(rx)) + .await; + + let cache2 = Arc::clone(&cache); + let handle = tokio::spawn(async move { cache2.inc_rc(42).await }); + + // Evict instead of completing + cache.inode_table.remove_async(&42).await; + drop(tx); + + let result = handle + .await + .unwrap_or_else(|e| panic!("task panicked: {e}")); + assert_eq!(result, None, "evicted entry should return None"); + } + + /// Resolver that pauses mid-resolution via a `Notify`, allowing the test + /// to interleave a `forget` while the resolve future is suspended. + struct SlowResolver { + /// Signalled by the resolver once it has started (so the test knows + /// resolution is in progress). + started: Arc, + /// The resolver waits on this before returning (the test signals it + /// after calling `forget`). + proceed: Arc, + } + + impl IcbResolver for SlowResolver { + type Icb = TestIcb; + type Error = String; + + fn resolve( + &self, + _ino: Inode, + _stub: Option, + _cache: &AsyncICache, + ) -> impl Future> + Send { + let started = Arc::clone(&self.started); + let proceed = Arc::clone(&self.proceed); + async move { + started.notify_one(); + proceed.notified().await; + Ok(TestIcb { + rc: 1, + path: "/slow-resolved".into(), + resolved: true, + }) + } + } + } + + /// Regression test: `get_icb` must survive the entry cycling back to + /// `InFlight` between when `wait_for_available` returns and when + /// `read_async` runs. The loop in `get_icb` should retry and eventually + /// return the final resolved value. + #[tokio::test] + async fn wait_for_available_retries_on_re_inflight() { + let cache = Arc::new(test_cache()); + let ino: Inode = 42; + + // Phase 1: insert an InFlight entry. + let (tx1, rx1) = watch::channel(()); + cache + .inode_table + .upsert_async(ino, IcbState::InFlight(rx1)) + .await; + + // Spawn get_icb — it will wait for InFlight to resolve. + let cache_get = Arc::clone(&cache); + let get_handle = + tokio::spawn(async move { cache_get.get_icb(ino, |icb| icb.path.clone()).await }); + + // Give get_icb time to start waiting on the watch channel. + tokio::task::yield_now().await; + + // Phase 1 complete: transition to Available briefly, then immediately + // back to InFlight (simulates get_or_resolve finding a stub and + // re-entering InFlight for a second resolution). + let (tx2, rx2) = watch::channel(()); + cache + .inode_table + .upsert_async(ino, IcbState::InFlight(rx2)) + .await; + // Signal phase-1 watchers so get_icb wakes up; it will re-read the + // entry and find InFlight again, then loop back to wait. + drop(tx1); + + // Give get_icb time to re-enter the wait loop. + tokio::task::yield_now().await; + + // Phase 2 complete: write the final resolved value. + cache + .inode_table + .upsert_async( + ino, + IcbState::Available(TestIcb { + rc: 1, + path: "/fully-resolved".into(), + resolved: true, + }), + ) + .await; + drop(tx2); + + // get_icb should return the final resolved value (not None). + let result = get_handle.await.expect("get_icb task panicked"); + assert_eq!( + result, + Some(PathBuf::from("/fully-resolved")), + "get_icb must survive re-InFlight and return the final resolved value" + ); + } + + /// Regression test: an entry evicted by `forget` during an in-progress + /// `get_or_resolve` must NOT be resurrected when resolution completes. + #[tokio::test] + async fn get_or_resolve_does_not_resurrect_evicted_entry() { + let started = Arc::new(tokio::sync::Notify::new()); + let proceed = Arc::new(tokio::sync::Notify::new()); + + let cache = Arc::new(AsyncICache::new( + SlowResolver { + started: Arc::clone(&started), + proceed: Arc::clone(&proceed), + }, + 1, + "/root", + )); + + let ino: Inode = 42; + + // Insert a stub with rc=1 (simulates a looked-up, unresolved entry). + cache + .insert_icb( + ino, + TestIcb { + rc: 1, + path: "/stub".into(), + resolved: false, + }, + ) + .await; + + // Spawn get_or_resolve which will trigger slow resolution. + let cache2 = Arc::clone(&cache); + let resolve_handle = + tokio::spawn(async move { cache2.get_or_resolve(ino, |icb| icb.path.clone()).await }); + + // Wait until the resolver has started (entry is now InFlight). + started.notified().await; + + // Evict the entry while resolution is in progress. + // forget waits for InFlight, so we need to complete resolution for + // forget to proceed. Instead, remove the InFlight entry directly to + // simulate a concurrent eviction (e.g., by another path that already + // removed the entry). + cache.inode_table.remove_async(&ino).await; + + // Let the resolver finish. + proceed.notify_one(); + + // Wait for get_or_resolve to complete. + drop(resolve_handle.await.expect("task panicked")); + + // The entry must NOT have been resurrected by write_back_if_present. + assert!( + !cache.contains(ino), + "evicted entry must not be resurrected after resolution completes" + ); + } +} diff --git a/src/fs/icache/bridge.rs b/src/fs/icache/bridge.rs index 4846549..e674a56 100644 --- a/src/fs/icache/bridge.rs +++ b/src/fs/icache/bridge.rs @@ -65,8 +65,17 @@ impl HashMapBridge { /// Rewrite the `ino` field in a [`FileAttr`] from right (inner) to left (outer) namespace. pub fn attr_backward(&self, attr: FileAttr) -> FileAttr { - let backward = - |ino: Inode| -> Inode { self.inode_map.get_by_right(&ino).copied().unwrap_or(ino) }; + let backward = |ino: Inode| -> Inode { + if let Some(&left) = self.inode_map.get_by_right(&ino) { + left + } else { + tracing::warn!( + inner_ino = ino, + "attr_backward: no bridge mapping, using raw inner inode" + ); + ino + } + }; rewrite_attr_ino(attr, backward) } diff --git a/src/fs/icache/cache.rs b/src/fs/icache/cache.rs deleted file mode 100644 index 675a3ba..0000000 --- a/src/fs/icache/cache.rs +++ /dev/null @@ -1,99 +0,0 @@ -//! Generic inode table with reference counting and file handle allocation. - -use std::collections::HashMap; - -use tracing::{trace, warn}; - -use crate::fs::r#trait::{FileHandle, Inode}; - -use super::IcbLike; - -/// Generic directory cache. -/// -/// Owns an inode table and a file handle counter. Provides reference counting, -/// ICB lookup/insertion, and file handle allocation. -pub struct ICache { - inode_table: HashMap, - next_fh: FileHandle, -} - -impl ICache { - /// Create a new `ICache` with a root ICB at `root_ino` (rc=1). - pub fn new(root_ino: Inode, root_path: impl Into) -> Self { - let mut inode_table = HashMap::new(); - inode_table.insert(root_ino, I::new_root(root_path.into())); - Self { - inode_table, - next_fh: 1, - } - } - - /// Allocate a file handle (increments `next_fh` and returns the old value). - pub fn allocate_fh(&mut self) -> FileHandle { - let fh = self.next_fh; - self.next_fh += 1; - fh - } - - pub fn get_icb(&self, ino: Inode) -> Option<&I> { - self.inode_table.get(&ino) - } - - pub fn get_icb_mut(&mut self, ino: Inode) -> Option<&mut I> { - self.inode_table.get_mut(&ino) - } - - pub fn contains(&self, ino: Inode) -> bool { - self.inode_table.contains_key(&ino) - } - - /// Insert an ICB directly. - pub fn insert_icb(&mut self, ino: Inode, icb: I) { - self.inode_table.insert(ino, icb); - } - - /// Insert an ICB only if absent. - /// Returns a mutable reference to the (possibly pre-existing) ICB. - pub fn entry_or_insert_icb(&mut self, ino: Inode, f: impl FnOnce() -> I) -> &mut I { - self.inode_table.entry(ino).or_insert_with(f) - } - - /// Number of inodes in the table. - pub fn inode_count(&self) -> usize { - self.inode_table.len() - } - - /// Increment rc. Panics (via unwrap) if inode doesn't exist. - pub fn inc_rc(&mut self, ino: Inode) -> u64 { - let icb = self - .inode_table - .get_mut(&ino) - .unwrap_or_else(|| unreachable!("inc_rc: inode {ino} not in table")); - *icb.rc_mut() += 1; - icb.rc() - } - - /// Decrement rc by `nlookups`. Returns `Some(evicted_icb)` if the inode was evicted. - pub fn forget(&mut self, ino: Inode, nlookups: u64) -> Option { - match self.inode_table.entry(ino) { - std::collections::hash_map::Entry::Occupied(mut entry) => { - if entry.get().rc() <= nlookups { - trace!(ino, "evicting inode"); - Some(entry.remove()) - } else { - *entry.get_mut().rc_mut() -= nlookups; - trace!(ino, new_rc = entry.get().rc(), "decremented rc"); - None - } - } - std::collections::hash_map::Entry::Vacant(_) => { - warn!(ino, "forget on unknown inode"); - None - } - } - } - - pub fn iter(&self) -> impl Iterator { - self.inode_table.iter() - } -} diff --git a/src/fs/icache/file_table.rs b/src/fs/icache/file_table.rs new file mode 100644 index 0000000..332a6ff --- /dev/null +++ b/src/fs/icache/file_table.rs @@ -0,0 +1,22 @@ +use std::sync::atomic::{AtomicU64, Ordering}; + +use crate::fs::r#trait::FileHandle; + +/// Monotonically increasing file handle allocator. +#[must_use] +pub struct FileTable { + next_fh: AtomicU64, +} + +impl FileTable { + pub fn new() -> Self { + Self { + next_fh: AtomicU64::new(1), + } + } + + #[must_use] + pub fn allocate(&self) -> FileHandle { + self.next_fh.fetch_add(1, Ordering::Relaxed) + } +} diff --git a/src/fs/icache/inode_factory.rs b/src/fs/icache/inode_factory.rs index 3f8f95e..1a60338 100644 --- a/src/fs/icache/inode_factory.rs +++ b/src/fs/icache/inode_factory.rs @@ -1,18 +1,19 @@ use crate::fs::r#trait::Inode; +use std::sync::atomic::{AtomicU64, Ordering}; /// Monotonically increasing inode allocator. pub struct InodeFactory { - next_inode: Inode, + next_inode: AtomicU64, } impl InodeFactory { pub fn new(start: Inode) -> Self { - Self { next_inode: start } + Self { + next_inode: AtomicU64::new(start), + } } - pub fn allocate(&mut self) -> Inode { - let ino = self.next_inode; - self.next_inode += 1; - ino + pub fn allocate(&self) -> Inode { + self.next_inode.fetch_add(1, Ordering::Relaxed) } } diff --git a/src/fs/icache/mod.rs b/src/fs/icache/mod.rs index 91f8bdd..2ccd80b 100644 --- a/src/fs/icache/mod.rs +++ b/src/fs/icache/mod.rs @@ -1,16 +1,21 @@ //! Generic directory cache and inode management primitives. +pub mod async_cache; pub mod bridge; -mod cache; +mod file_table; mod inode_factory; -pub use cache::ICache; +pub use async_cache::AsyncICache; +pub use async_cache::IcbResolver; +pub use file_table::FileTable; pub use inode_factory::InodeFactory; /// Common interface for inode control block types usable with `ICache`. -pub trait IcbLike { +pub trait IcbLike: Clone { /// Create an ICB with rc=1, the given path, and no children. fn new_root(path: std::path::PathBuf) -> Self; fn rc(&self) -> u64; fn rc_mut(&mut self) -> &mut u64; + /// Returns true if this entry needs resolution (e.g., attr not yet fetched). + fn needs_resolve(&self) -> bool; } diff --git a/src/fs/mescloud/common.rs b/src/fs/mescloud/common.rs index c0f6f6e..340b588 100644 --- a/src/fs/mescloud/common.rs +++ b/src/fs/mescloud/common.rs @@ -3,6 +3,8 @@ use mesa_dev::low_level::apis; use thiserror::Error; +use crate::fs::r#trait::{FileAttr, Inode}; + pub(super) use super::icache::InodeControlBlock; /// A concrete error type that preserves the structure of `mesa_dev::low_level::apis::Error` @@ -137,6 +139,15 @@ pub enum ReadDirError { NotPermitted, } +impl From for ReadDirError { + fn from(e: LookupError) -> Self { + match e { + LookupError::RemoteMesaError(api) => Self::RemoteMesaError(api), + LookupError::InodeNotFound | LookupError::FileDoesNotExist => Self::InodeNotFound, + } + } +} + impl From for i32 { fn from(e: ReadDirError) -> Self { match e { @@ -161,3 +172,36 @@ impl From for i32 { } } } + +/// Allows a parent compositor to peek at cached attrs from a child filesystem. +#[async_trait::async_trait] +pub(super) trait InodeCachePeek { + async fn peek_attr(&self, ino: Inode) -> Option; +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn lookup_inode_not_found_converts_to_readdir_inode_not_found() { + let err: ReadDirError = LookupError::InodeNotFound.into(); + assert!(matches!(err, ReadDirError::InodeNotFound)); + } + + #[test] + fn lookup_file_does_not_exist_converts_to_readdir_inode_not_found() { + let err: ReadDirError = LookupError::FileDoesNotExist.into(); + assert!(matches!(err, ReadDirError::InodeNotFound)); + } + + #[test] + fn lookup_remote_error_converts_to_readdir_remote_error() { + let api_err = MesaApiError::Response { + status: 500, + body: "test".to_owned(), + }; + let err: ReadDirError = LookupError::RemoteMesaError(api_err).into(); + assert!(matches!(err, ReadDirError::RemoteMesaError(_))); + } +} diff --git a/src/fs/mescloud/composite.rs b/src/fs/mescloud/composite.rs new file mode 100644 index 0000000..6dbac25 --- /dev/null +++ b/src/fs/mescloud/composite.rs @@ -0,0 +1,308 @@ +use std::collections::HashMap; +use std::ffi::OsStr; + +use bytes::Bytes; +use tracing::{instrument, trace, warn}; + +use crate::fs::icache::bridge::HashMapBridge; +use crate::fs::icache::{FileTable, IcbResolver}; +use crate::fs::r#trait::{ + DirEntry, FileAttr, FileHandle, FilesystemStats, Fs, Inode, LockOwner, OpenFile, OpenFlags, +}; + +use super::common::{ + GetAttrError, InodeCachePeek, LookupError, OpenError, ReadDirError, ReadError, ReleaseError, +}; +use super::icache::{InodeControlBlock, MescloudICache}; + +/// A child filesystem slot: inner filesystem + bidirectional inode/fh bridge. +pub(super) struct ChildSlot { + pub inner: Inner, + pub bridge: HashMapBridge, +} + +/// Layered filesystem that presents multiple child filesystems under a single +/// inode namespace. +/// +/// `MesaCloud`'s filesystem is a hierarchy of compositions: +/// +/// ```text +/// MesaFS (CompositeFs<_, OrgFs>) +/// └─ OrgFs (CompositeFs<_, RepoFs>) +/// └─ RepoFs (leaf — backed by git) +/// ``` +/// +/// Each child filesystem numbers its inodes starting from 1, so the composite +/// maintains a bidirectional inode/file-handle bridge per child (see +/// [`ChildSlot`]) to translate between the outer namespace visible to FUSE and +/// each child's internal namespace. +pub(super) struct CompositeFs +where + R: IcbResolver, +{ + pub icache: MescloudICache, + pub file_table: FileTable, + pub readdir_buf: Vec, + /// Maps outer inode to index into `slots` for child-root inodes. + pub child_inodes: HashMap, + /// Maps every translated outer inode to its owning slot index. + pub inode_to_slot: HashMap, + pub slots: Vec>, +} + +impl CompositeFs +where + R: IcbResolver, + Inner: Fs< + LookupError = LookupError, + GetAttrError = GetAttrError, + OpenError = OpenError, + ReadError = ReadError, + ReaddirError = ReadDirError, + ReleaseError = ReleaseError, + > + InodeCachePeek + + Send + + Sync, +{ + /// Look up which child slot owns an inode via direct map. + #[instrument(name = "CompositeFs::slot_for_inode", skip(self))] + pub fn slot_for_inode(&self, ino: Inode) -> Option { + self.inode_to_slot.get(&ino).copied() + } + + /// Allocate an outer file handle and map it through the bridge. + #[must_use] + pub fn alloc_fh(&mut self, slot_idx: usize, inner_fh: FileHandle) -> FileHandle { + let fh = self.file_table.allocate(); + self.slots[slot_idx].bridge.insert_fh(fh, inner_fh); + fh + } + + /// Translate an inner inode to an outer inode, allocating if needed. + /// Also inserts a stub ICB into the outer icache when the inode is new. + #[instrument(name = "CompositeFs::translate_inner_ino", skip(self, name))] + pub async fn translate_inner_ino( + &mut self, + slot_idx: usize, + inner_ino: Inode, + parent_outer_ino: Inode, + name: &OsStr, + ) -> Inode { + let outer_ino = self.slots[slot_idx] + .bridge + .backward_or_insert_inode(inner_ino, || self.icache.allocate_inode()); + self.inode_to_slot.insert(outer_ino, slot_idx); + self.icache + .entry_or_insert_icb( + outer_ino, + || InodeControlBlock { + rc: 0, + path: name.into(), + parent: Some(parent_outer_ino), + attr: None, + children: None, + }, + |_| {}, + ) + .await; + outer_ino + } + + /// Get cached file attributes for an inode. + #[instrument(name = "CompositeFs::delegated_getattr", skip(self))] + pub async fn delegated_getattr(&self, ino: Inode) -> Result { + self.icache.get_attr(ino).await.ok_or_else(|| { + warn!(ino, "getattr on unknown inode"); + GetAttrError::InodeNotFound + }) + } + + /// Find slot, forward inode, delegate to inner, allocate outer file handle. + #[instrument(name = "CompositeFs::delegated_open", skip(self))] + pub async fn delegated_open( + &mut self, + ino: Inode, + flags: OpenFlags, + ) -> Result { + let idx = self.slot_for_inode(ino).ok_or_else(|| { + warn!(ino, "open on inode not belonging to any child"); + OpenError::InodeNotFound + })?; + let inner_ino = self.slots[idx] + .bridge + .forward_or_insert_inode(ino, || unreachable!("open: ino should be mapped")); + let inner_open = self.slots[idx].inner.open(inner_ino, flags).await?; + let outer_fh = self.alloc_fh(idx, inner_open.handle); + trace!( + ino, + outer_fh, + inner_fh = inner_open.handle, + "open: assigned file handle" + ); + Ok(OpenFile { + handle: outer_fh, + options: inner_open.options, + }) + } + + /// Find slot, forward inode and file handle, delegate read to inner. + #[expect(clippy::too_many_arguments, reason = "mirrors fuser read API")] + #[instrument(name = "CompositeFs::delegated_read", skip(self))] + pub async fn delegated_read( + &mut self, + ino: Inode, + fh: FileHandle, + offset: u64, + size: u32, + flags: OpenFlags, + lock_owner: Option, + ) -> Result { + let idx = self.slot_for_inode(ino).ok_or_else(|| { + warn!(ino, "read on inode not belonging to any child"); + ReadError::InodeNotFound + })?; + let inner_ino = self.slots[idx] + .bridge + .forward_or_insert_inode(ino, || unreachable!("read: ino should be mapped")); + let inner_fh = self.slots[idx].bridge.fh_forward(fh).ok_or_else(|| { + warn!(fh, "read: no fh mapping found"); + ReadError::FileNotOpen + })?; + self.slots[idx] + .inner + .read(inner_ino, inner_fh, offset, size, flags, lock_owner) + .await + } + + /// Find slot, forward inode and file handle, delegate release to inner, + /// then clean up the file handle mapping. + #[instrument(name = "CompositeFs::delegated_release", skip(self))] + pub async fn delegated_release( + &mut self, + ino: Inode, + fh: FileHandle, + flags: OpenFlags, + flush: bool, + ) -> Result<(), ReleaseError> { + let idx = self.slot_for_inode(ino).ok_or_else(|| { + warn!(ino, "release on inode not belonging to any child"); + ReleaseError::FileNotOpen + })?; + let inner_ino = self.slots[idx] + .bridge + .forward_or_insert_inode(ino, || unreachable!("release: ino should be mapped")); + let inner_fh = self.slots[idx].bridge.fh_forward(fh).ok_or_else(|| { + warn!(fh, "release: no fh mapping found"); + ReleaseError::FileNotOpen + })?; + let result = self.slots[idx] + .inner + .release(inner_ino, inner_fh, flags, flush) + .await; + self.slots[idx].bridge.remove_fh_by_left(fh); + trace!(ino, fh, "release: cleaned up fh mapping"); + result + } + + /// Propagate forget to the inner filesystem, evict from icache, and clean + /// up bridge mappings. Returns `true` if the inode was evicted. + /// + /// Child-root inodes (those in `child_inodes`) do NOT propagate forget to + /// the inner filesystem: the inner root's `rc=1` is an initialization + /// invariant unrelated to outer FUSE lookup counts. Propagating would + /// evict the inner root, breaking all subsequent operations on that child. + #[must_use] + #[instrument(name = "CompositeFs::delegated_forget", skip(self))] + pub async fn delegated_forget(&mut self, ino: Inode, nlookups: u64) -> bool { + let slot_idx = self.slot_for_inode(ino); + let is_child_root = self.child_inodes.contains_key(&ino); + if !is_child_root + && let Some(idx) = slot_idx + && let Some(&inner_ino) = self.slots[idx].bridge.inode_map_get_by_left(ino) + { + self.slots[idx].inner.forget(inner_ino, nlookups).await; + } + if self.icache.forget(ino, nlookups).await.is_some() { + self.child_inodes.remove(&ino); + self.inode_to_slot.remove(&ino); + if let Some(idx) = slot_idx { + self.slots[idx].bridge.remove_inode_by_left(ino); + } + true + } else { + false + } + } + + /// Return filesystem statistics from the icache. + #[must_use] + pub fn delegated_statfs(&self) -> FilesystemStats { + self.icache.statfs() + } + + /// Delegation branch for lookup when the parent is owned by a child slot. + #[instrument(name = "CompositeFs::delegated_lookup", skip(self, name))] + pub async fn delegated_lookup( + &mut self, + parent: Inode, + name: &OsStr, + ) -> Result { + let idx = self + .slot_for_inode(parent) + .ok_or(LookupError::InodeNotFound)?; + let inner_parent = self.slots[idx] + .bridge + .forward_or_insert_inode(parent, || unreachable!("lookup: parent should be mapped")); + let inner_attr = self.slots[idx].inner.lookup(inner_parent, name).await?; + let inner_ino = inner_attr.common().ino; + let outer_ino = self.translate_inner_ino(idx, inner_ino, parent, name).await; + let outer_attr = self.slots[idx].bridge.attr_backward(inner_attr); + self.icache.cache_attr(outer_ino, outer_attr).await; + // None means the entry was concurrently evicted; fail the lookup so + // the kernel doesn't hold a ref the cache no longer tracks. + let rc = self + .icache + .inc_rc(outer_ino) + .await + .ok_or(LookupError::InodeNotFound)?; + trace!(outer_ino, inner_ino, rc, "lookup: resolved via delegation"); + Ok(outer_attr) + } + + /// Delegation branch for readdir when the inode is owned by a child slot. + #[instrument(name = "CompositeFs::delegated_readdir", skip(self))] + pub async fn delegated_readdir(&mut self, ino: Inode) -> Result<&[DirEntry], ReadDirError> { + let idx = self + .slot_for_inode(ino) + .ok_or(ReadDirError::InodeNotFound)?; + let inner_ino = self.slots[idx] + .bridge + .forward_or_insert_inode(ino, || unreachable!("readdir: ino should be mapped")); + let inner_entries = self.slots[idx].inner.readdir(inner_ino).await?; + let inner_entries: Vec = inner_entries.to_vec(); + let evicted = self.icache.evict_zero_rc_children(ino).await; + for evicted_ino in evicted { + if let Some(slot) = self.inode_to_slot.remove(&evicted_ino) { + self.slots[slot].bridge.remove_inode_by_left(evicted_ino); + } + self.child_inodes.remove(&evicted_ino); + } + let mut outer_entries = Vec::with_capacity(inner_entries.len()); + for entry in &inner_entries { + let outer_child_ino = self + .translate_inner_ino(idx, entry.ino, ino, &entry.name) + .await; + if let Some(inner_attr) = self.slots[idx].inner.peek_attr(entry.ino).await { + let outer_attr = self.slots[idx].bridge.attr_backward(inner_attr); + self.icache.cache_attr(outer_child_ino, outer_attr).await; + } + outer_entries.push(DirEntry { + ino: outer_child_ino, + name: entry.name.clone(), + kind: entry.kind, + }); + } + self.readdir_buf = outer_entries; + Ok(&self.readdir_buf) + } +} diff --git a/src/fs/mescloud/icache.rs b/src/fs/mescloud/icache.rs index d6d9ab9..15f1f5d 100644 --- a/src/fs/mescloud/icache.rs +++ b/src/fs/mescloud/icache.rs @@ -3,22 +3,21 @@ use std::ffi::OsStr; use std::time::SystemTime; -use tracing::warn; - -use crate::fs::icache::{ICache, IcbLike, InodeFactory}; +use crate::fs::icache::{AsyncICache, IcbLike, IcbResolver, InodeFactory}; use crate::fs::r#trait::{ - CommonFileAttr, DirEntry, DirEntryType, FileAttr, FilesystemStats, Inode, Permissions, + CommonFileAttr, DirEntryType, FileAttr, FilesystemStats, Inode, Permissions, }; -/// Inode control block for mescloud filesystem layers (`MesaFS`, `OrgFs`, `RepoFs`). +/// Inode control block for mescloud filesystem layers. +#[derive(Clone)] pub struct InodeControlBlock { - /// The root inode doesn't have a parent. pub parent: Option, pub rc: u64, pub path: std::path::PathBuf, - pub children: Option>, /// Cached file attributes from the last lookup. pub attr: Option, + /// Cached directory children from the resolver (directories only). + pub children: Option>, } impl IcbLike for InodeControlBlock { @@ -27,8 +26,8 @@ impl IcbLike for InodeControlBlock { rc: 1, parent: None, path, - children: None, attr: None, + children: None, } } @@ -39,6 +38,14 @@ impl IcbLike for InodeControlBlock { fn rc_mut(&mut self) -> &mut u64 { &mut self.rc } + + fn needs_resolve(&self) -> bool { + match self.attr { + None => true, + Some(FileAttr::Directory { .. }) => self.children.is_none(), + Some(_) => false, + } + } } /// Calculate the number of blocks needed for a given size. @@ -46,151 +53,130 @@ pub fn blocks_of_size(block_size: u32, size: u64) -> u64 { size.div_ceil(u64::from(block_size)) } -/// Mescloud-specific directory cache. -/// -/// Wraps [`ICache`] and adds inode allocation, attribute -/// caching, `ensure_child_inode`, and filesystem metadata. -pub struct MescloudICache { - inner: ICache, - inode_factory: InodeFactory, +/// Free function -- usable by both `MescloudICache` and resolvers. +pub fn make_common_file_attr( + ino: Inode, + perm: u16, + atime: SystemTime, + mtime: SystemTime, fs_owner: (u32, u32), block_size: u32, -} - -impl std::ops::Deref for MescloudICache { - type Target = ICache; - fn deref(&self) -> &Self::Target { - &self.inner +) -> CommonFileAttr { + CommonFileAttr { + ino, + atime, + mtime, + ctime: SystemTime::UNIX_EPOCH, + crtime: SystemTime::UNIX_EPOCH, + perm: Permissions::from_bits_truncate(perm), + nlink: 1, + uid: fs_owner.0, + gid: fs_owner.1, + blksize: block_size, } } -impl std::ops::DerefMut for MescloudICache { - fn deref_mut(&mut self) -> &mut Self::Target { - &mut self.inner - } +/// Mescloud-specific directory cache wrapper over `AsyncICache`. +pub struct MescloudICache> { + inner: AsyncICache, + inode_factory: InodeFactory, + fs_owner: (u32, u32), + block_size: u32, } -impl MescloudICache { +impl> MescloudICache { /// Create a new `MescloudICache`. Initializes root ICB (rc=1), caches root dir attr. - pub fn new(root_ino: Inode, fs_owner: (u32, u32), block_size: u32) -> Self { - let mut icache = Self { - inner: ICache::new(root_ino, "/"), + pub fn new(resolver: R, root_ino: Inode, fs_owner: (u32, u32), block_size: u32) -> Self { + let cache = Self { + inner: AsyncICache::new(resolver, root_ino, "/"), inode_factory: InodeFactory::new(root_ino + 1), fs_owner, block_size, }; + // Set root directory attr synchronously during initialization let now = SystemTime::now(); let root_attr = FileAttr::Directory { - common: icache.make_common_file_attr(root_ino, 0o755, now, now), + common: make_common_file_attr(root_ino, 0o755, now, now, fs_owner, block_size), }; - icache.cache_attr(root_ino, root_attr); - icache - } + cache.inner.get_icb_mut_sync(root_ino, |icb| { + icb.attr = Some(root_attr); + }); - /// Allocate a new inode number. - pub fn allocate_inode(&mut self) -> Inode { - self.inode_factory.allocate() + cache } - pub fn get_attr(&self, ino: Inode) -> Option { - self.inner.get_icb(ino).and_then(|icb| icb.attr) + // -- Delegated from AsyncICache (async) -- + + pub fn contains(&self, ino: Inode) -> bool { + self.inner.contains(ino) } - pub fn cache_attr(&mut self, ino: Inode, attr: FileAttr) { - if let Some(icb) = self.inner.get_icb_mut(ino) { - icb.attr = Some(attr); - } + pub async fn get_icb( + &self, + ino: Inode, + // `Sync` required: see comment on `AsyncICache::get_icb`. + f: impl Fn(&InodeControlBlock) -> T + Send + Sync, + ) -> Option { + self.inner.get_icb(ino, f).await } - /// Ensure a child inode exists under `parent` with the given `name` and `kind`. - /// Reuses existing inode if present. Does NOT bump rc. - pub fn ensure_child_inode( - &mut self, - parent: Inode, - name: &OsStr, - kind: DirEntryType, - ) -> (Inode, FileAttr) { - // Check existing child by parent + name. - let existing = self - .inner - .iter() - .find(|&(&_ino, icb)| icb.parent == Some(parent) && icb.path.as_os_str() == name) - .map(|(&ino, _)| ino); - - if let Some(existing_ino) = existing { - if let Some(attr) = self.inner.get_icb(existing_ino).and_then(|icb| icb.attr) { - return (existing_ino, attr); - } + pub async fn insert_icb(&self, ino: Inode, icb: InodeControlBlock) { + self.inner.insert_icb(ino, icb).await; + } - warn!(ino = existing_ino, parent, name = ?name, ?kind, - "ensure_child_inode: attr missing on existing inode, rebuilding"); - let attr = self.make_attr_for_kind(existing_ino, kind); - self.cache_attr(existing_ino, attr); - return (existing_ino, attr); - } + pub async fn entry_or_insert_icb( + &self, + ino: Inode, + factory: impl FnOnce() -> InodeControlBlock, + then: impl FnOnce(&mut InodeControlBlock) -> T, + ) -> T { + self.inner.entry_or_insert_icb(ino, factory, then).await + } - let ino = self.inode_factory.allocate(); - self.inner.insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: name.into(), - parent: Some(parent), - children: None, - attr: None, - }, - ); + pub async fn inc_rc(&self, ino: Inode) -> Option { + self.inner.inc_rc(ino).await + } - let attr = self.make_attr_for_kind(ino, kind); - self.cache_attr(ino, attr); - (ino, attr) + pub async fn forget(&self, ino: Inode, nlookups: u64) -> Option { + self.inner.forget(ino, nlookups).await } - pub fn make_common_file_attr( + pub async fn get_or_resolve( &self, ino: Inode, - perm: u16, - atime: SystemTime, - mtime: SystemTime, - ) -> CommonFileAttr { - CommonFileAttr { - ino, - atime, - mtime, - ctime: SystemTime::UNIX_EPOCH, - crtime: SystemTime::UNIX_EPOCH, - perm: Permissions::from_bits_truncate(perm), - nlink: 1, - uid: self.fs_owner.0, - gid: self.fs_owner.1, - blksize: self.block_size, - } + then: impl FnOnce(&InodeControlBlock) -> T, + ) -> Result { + self.inner.get_or_resolve(ino, then).await } - fn make_attr_for_kind(&self, ino: Inode, kind: DirEntryType) -> FileAttr { - let now = SystemTime::now(); - match kind { - DirEntryType::Directory => FileAttr::Directory { - common: self.make_common_file_attr(ino, 0o755, now, now), - }, - DirEntryType::RegularFile - | DirEntryType::Symlink - | DirEntryType::CharDevice - | DirEntryType::BlockDevice - | DirEntryType::NamedPipe - | DirEntryType::Socket => FileAttr::RegularFile { - common: self.make_common_file_attr(ino, 0o644, now, now), - size: 0, - blocks: 0, - }, - } + // -- Domain-specific -- + + /// Allocate a new inode number. + pub fn allocate_inode(&self) -> Inode { + self.inode_factory.allocate() + } + + pub async fn get_attr(&self, ino: Inode) -> Option { + self.inner.get_icb(ino, |icb| icb.attr).await.flatten() + } + + pub async fn cache_attr(&self, ino: Inode, attr: FileAttr) { + self.inner + .get_icb_mut(ino, |icb| { + icb.attr = Some(attr); + }) + .await; } pub fn fs_owner(&self) -> (u32, u32) { self.fs_owner } + pub fn block_size(&self) -> u32 { + self.block_size + } + pub fn statfs(&self) -> FilesystemStats { FilesystemStats { block_size: self.block_size, @@ -206,4 +192,246 @@ impl MescloudICache { max_filename_length: 255, } } + + /// Evict all `Available` children of `parent` that have `rc == 0`. + /// Returns the list of evicted inode numbers so callers can clean up + /// associated state (e.g., bridge mappings, slot tracking). + pub async fn evict_zero_rc_children(&self, parent: Inode) -> Vec { + let mut to_evict = Vec::new(); + self.inner + .for_each(|&ino, icb| { + if icb.rc == 0 && icb.parent == Some(parent) { + to_evict.push(ino); + } + }) + .await; + let mut evicted = Vec::new(); + for ino in to_evict { + if self.inner.forget(ino, 0).await.is_some() { + evicted.push(ino); + } + } + evicted + } + + /// Find an existing child by (parent, name) or allocate a new inode. + /// If new, inserts a stub ICB (parent+path set, attr=None, children=None, rc=0). + /// Does NOT bump rc. Returns the inode number. + /// + /// # Safety invariant + /// + /// The `for_each` scan and `insert_icb` are **not** atomic. If two callers + /// race with the same `(parent, name)`, both may allocate distinct inodes + /// for the same logical child. This is currently safe because all callers + /// go through `&mut self` on the owning `Fs` implementation. + pub async fn ensure_child_ino(&self, parent: Inode, name: &OsStr) -> Inode { + // Search for existing child by parent + name + let mut existing_ino = None; + self.inner + .for_each(|&ino, icb| { + if icb.parent == Some(parent) && icb.path.as_os_str() == name { + existing_ino = Some(ino); + } + }) + .await; + + if let Some(ino) = existing_ino { + return ino; + } + + // Allocate new inode and insert stub + let ino = self.inode_factory.allocate(); + self.inner + .insert_icb( + ino, + InodeControlBlock { + rc: 0, + path: name.into(), + parent: Some(parent), + attr: None, + children: None, + }, + ) + .await; + ino + } +} + +#[cfg(test)] +mod tests { + use std::future::Future; + + use super::*; + use crate::fs::icache::async_cache::AsyncICache; + use crate::fs::r#trait::DirEntryType; + + fn dummy_dir_attr(ino: Inode) -> FileAttr { + let now = SystemTime::now(); + FileAttr::Directory { + common: make_common_file_attr(ino, 0o755, now, now, (0, 0), 4096), + } + } + + fn dummy_file_attr(ino: Inode) -> FileAttr { + let now = SystemTime::now(); + FileAttr::RegularFile { + common: make_common_file_attr(ino, 0o644, now, now, (0, 0), 4096), + size: 100, + blocks: 1, + } + } + + #[test] + fn needs_resolve_stub_returns_true() { + let icb = InodeControlBlock { + parent: Some(1), + rc: 0, + path: "stub".into(), + attr: None, + children: None, + }; + assert!(icb.needs_resolve()); + } + + #[test] + fn needs_resolve_file_with_attr_returns_false() { + let icb = InodeControlBlock { + parent: Some(1), + rc: 1, + path: "file.txt".into(), + attr: Some(dummy_file_attr(2)), + children: None, + }; + assert!(!icb.needs_resolve()); + } + + #[test] + fn needs_resolve_dir_without_children_returns_true() { + let icb = InodeControlBlock { + parent: Some(1), + rc: 1, + path: "dir".into(), + attr: Some(dummy_dir_attr(3)), + children: None, + }; + assert!(icb.needs_resolve()); + } + + #[test] + fn needs_resolve_dir_with_children_returns_false() { + let icb = InodeControlBlock { + parent: Some(1), + rc: 1, + path: "dir".into(), + attr: Some(dummy_dir_attr(3)), + children: Some(vec![("README.md".to_owned(), DirEntryType::RegularFile)]), + }; + assert!(!icb.needs_resolve()); + } + + #[test] + fn needs_resolve_dir_with_empty_children_returns_false() { + let icb = InodeControlBlock { + parent: Some(1), + rc: 1, + path: "empty-dir".into(), + attr: Some(dummy_dir_attr(4)), + children: Some(vec![]), + }; + assert!(!icb.needs_resolve()); + } + + struct NoOpResolver; + + impl IcbResolver for NoOpResolver { + type Icb = InodeControlBlock; + type Error = std::convert::Infallible; + + #[expect( + clippy::manual_async_fn, + reason = "must match IcbResolver trait signature" + )] + fn resolve( + &self, + _ino: Inode, + _stub: Option, + _cache: &AsyncICache, + ) -> impl Future> + Send { + async { unreachable!("NoOpResolver should not be called") } + } + } + + fn test_mescloud_cache() -> MescloudICache { + MescloudICache::new(NoOpResolver, 1, (0, 0), 4096) + } + + #[tokio::test] + async fn evict_zero_rc_children_removes_stubs() { + let cache = test_mescloud_cache(); + + // Insert stubs as children of root (ino=1) with rc=0 + cache + .insert_icb( + 10, + InodeControlBlock { + rc: 0, + path: "child_a".into(), + parent: Some(1), + attr: None, + children: None, + }, + ) + .await; + cache + .insert_icb( + 11, + InodeControlBlock { + rc: 0, + path: "child_b".into(), + parent: Some(1), + attr: None, + children: None, + }, + ) + .await; + + // Insert a child with rc > 0 — should survive + cache + .insert_icb( + 12, + InodeControlBlock { + rc: 1, + path: "active".into(), + parent: Some(1), + attr: None, + children: None, + }, + ) + .await; + + // Insert a stub under a different parent — should survive + cache + .insert_icb( + 20, + InodeControlBlock { + rc: 0, + path: "other".into(), + parent: Some(12), + attr: None, + children: None, + }, + ) + .await; + + let evicted = cache.evict_zero_rc_children(1).await; + assert_eq!(evicted.len(), 2, "should evict 2 zero-rc children of root"); + + assert!(!cache.contains(10), "child_a should be evicted"); + assert!(!cache.contains(11), "child_b should be evicted"); + assert!(cache.contains(12), "active child should survive"); + assert!( + cache.contains(20), + "child of different parent should survive" + ); + } } diff --git a/src/fs/mescloud/mod.rs b/src/fs/mescloud/mod.rs index 64ea0ad..0e32933 100644 --- a/src/fs/mescloud/mod.rs +++ b/src/fs/mescloud/mod.rs @@ -1,26 +1,33 @@ use std::collections::HashMap; use std::ffi::OsStr; +use std::future::Future; +use std::time::SystemTime; use bytes::Bytes; use mesa_dev::MesaClient; use secrecy::ExposeSecret as _; -use tracing::{instrument, trace, warn}; +use tracing::{Instrument as _, instrument, trace, warn}; use crate::fs::icache::bridge::HashMapBridge; +use crate::fs::icache::{AsyncICache, FileTable, IcbResolver}; use crate::fs::r#trait::{ DirEntry, DirEntryType, FileAttr, FileHandle, FilesystemStats, Fs, Inode, LockOwner, OpenFile, OpenFlags, }; +use composite::{ChildSlot, CompositeFs}; + #[cfg(feature = "staging")] const MESA_API_BASE_URL: &str = "https://staging.depot.mesa.dev/api/v1"; #[cfg(not(feature = "staging"))] const MESA_API_BASE_URL: &str = "https://depot.mesa.dev/api/v1"; mod common; +mod composite; use common::InodeControlBlock; pub use common::{GetAttrError, LookupError, OpenError, ReadDirError, ReadError, ReleaseError}; +use icache as mescloud_icache; use icache::MescloudICache; mod org; @@ -30,10 +37,48 @@ use org::OrgFs; pub mod icache; pub mod repo; -/// Per-org wrapper with inode and file handle translation. -struct OrgSlot { - org: OrgFs, - bridge: HashMapBridge, // left = mesa, right = org +struct MesaResolver { + fs_owner: (u32, u32), + block_size: u32, +} + +impl IcbResolver for MesaResolver { + type Icb = InodeControlBlock; + type Error = std::convert::Infallible; + + fn resolve( + &self, + ino: Inode, + stub: Option, + _cache: &AsyncICache, + ) -> impl Future> + Send + where + Self: Sized, + { + let fs_owner = self.fs_owner; + let block_size = self.block_size; + async move { + let stub = stub.unwrap_or_else(|| InodeControlBlock { + parent: None, + path: "/".into(), + rc: 0, + attr: None, + children: None, + }); + let now = SystemTime::now(); + let attr = FileAttr::Directory { + common: mescloud_icache::make_common_file_attr( + ino, 0o755, now, now, fs_owner, block_size, + ), + }; + Ok(InodeControlBlock { + attr: Some(attr), + children: Some(vec![]), + ..stub + }) + } + .instrument(tracing::info_span!("MesaResolver::resolve", ino)) + } } /// Classifies an inode by its role in the mesa hierarchy. @@ -41,19 +86,15 @@ enum InodeRole { /// The filesystem root (ino == 1). Root, /// An inode owned by some org. - OrgOwned { idx: usize }, + OrgOwned, } /// The top-level `MesaFS` filesystem. /// /// Composes multiple [`OrgFs`] instances, each with its own inode namespace, -/// using [`HashMapBridge`] for bidirectional inode/fh translation at each boundary. +/// delegating to [`CompositeFs`] for inode/fh translation at each boundary. pub struct MesaFS { - icache: MescloudICache, - - /// Maps mesa-level org-root inodes → index into `org_slots`. - org_inodes: HashMap, - org_slots: Vec, + composite: CompositeFs, } impl MesaFS { @@ -61,53 +102,51 @@ impl MesaFS { const BLOCK_SIZE: u32 = 4096; /// Create a new `MesaFS` instance. + #[must_use] pub fn new(orgs: impl Iterator, fs_owner: (u32, u32)) -> Self { + let resolver = MesaResolver { + fs_owner, + block_size: Self::BLOCK_SIZE, + }; Self { - icache: MescloudICache::new(Self::ROOT_NODE_INO, fs_owner, Self::BLOCK_SIZE), - org_inodes: HashMap::new(), - org_slots: orgs - .map(|org_conf| { - let client = MesaClient::builder() - .with_api_key(org_conf.api_key.expose_secret()) - .with_base_path(MESA_API_BASE_URL) - .build(); - let org = OrgFs::new(org_conf.name, client, fs_owner); - OrgSlot { - org, - bridge: HashMapBridge::new(), - } - }) - .collect(), + composite: CompositeFs { + icache: MescloudICache::new( + resolver, + Self::ROOT_NODE_INO, + fs_owner, + Self::BLOCK_SIZE, + ), + file_table: FileTable::new(), + readdir_buf: Vec::new(), + child_inodes: HashMap::new(), + inode_to_slot: HashMap::new(), + slots: orgs + .map(|org_conf| { + let client = MesaClient::builder() + .with_api_key(org_conf.api_key.expose_secret()) + .with_base_path(MESA_API_BASE_URL) + .build(); + let org = OrgFs::new(org_conf.name, client, fs_owner); + ChildSlot { + inner: org, + bridge: HashMapBridge::new(), + } + }) + .collect(), + }, } } /// Classify an inode by its role. - fn inode_role(&self, ino: Inode) -> InodeRole { + fn inode_role(&self, ino: Inode) -> Option { if ino == Self::ROOT_NODE_INO { - return InodeRole::Root; - } - if let Some(&idx) = self.org_inodes.get(&ino) { - return InodeRole::OrgOwned { idx }; - } - // Walk parent chain. - if let Some(idx) = self.org_slot_for_inode(ino) { - return InodeRole::OrgOwned { idx }; + return Some(InodeRole::Root); } - debug_assert!(false, "inode {ino} not found in any org slot"); - InodeRole::Root - } - - /// Find the org slot index that owns `ino` by walking the parent chain. - fn org_slot_for_inode(&self, ino: Inode) -> Option { - if let Some(&idx) = self.org_inodes.get(&ino) { - return Some(idx); + if self.composite.child_inodes.contains_key(&ino) { + return Some(InodeRole::OrgOwned); } - let mut current = ino; - while let Some(parent) = self.icache.get_icb(current).and_then(|icb| icb.parent) { - if let Some(&idx) = self.org_inodes.get(&parent) { - return Some(idx); - } - current = parent; + if self.composite.slot_for_inode(ino).is_some() { + return Some(InodeRole::OrgOwned); } None } @@ -115,97 +154,101 @@ impl MesaFS { /// Ensure a mesa-level inode exists for the org at `org_idx`. /// Seeds the bridge with (`mesa_org_ino`, `OrgFs::ROOT_INO`). /// Does NOT bump rc. - fn ensure_org_inode(&mut self, org_idx: usize) -> (Inode, FileAttr) { + async fn ensure_org_inode(&mut self, org_idx: usize) -> (Inode, FileAttr) { // Check if an inode already exists. - if let Some((&existing_ino, _)) = self.org_inodes.iter().find(|&(_, &idx)| idx == org_idx) { - if let Some(icb) = self.icache.get_icb(existing_ino) - && let Some(attr) = icb.attr - { + let existing_ino = self + .composite + .child_inodes + .iter() + .find(|&(_, &idx)| idx == org_idx) + .map(|(&ino, _)| ino); + + if let Some(existing_ino) = existing_ino { + if let Some(attr) = self.composite.icache.get_attr(existing_ino).await { + let rc = self + .composite + .icache + .get_icb(existing_ino, |icb| icb.rc) + .await + .unwrap_or(0); trace!( ino = existing_ino, - org_idx, - rc = icb.rc, - "ensure_org_inode: reusing existing inode" + org_idx, rc, "ensure_org_inode: reusing existing inode" ); return (existing_ino, attr); } - // Attr missing — rebuild. + if self.composite.icache.contains(existing_ino) { + // ICB exists but attr missing — rebuild and cache. + warn!( + ino = existing_ino, + org_idx, "ensure_org_inode: attr missing, rebuilding" + ); + let now = SystemTime::now(); + let attr = FileAttr::Directory { + common: mescloud_icache::make_common_file_attr( + existing_ino, + 0o755, + now, + now, + self.composite.icache.fs_owner(), + self.composite.icache.block_size(), + ), + }; + self.composite.icache.cache_attr(existing_ino, attr).await; + return (existing_ino, attr); + } + // ICB was evicted — clean up stale tracking entries. warn!( ino = existing_ino, - org_idx, "ensure_org_inode: attr missing, rebuilding" + org_idx, "ensure_org_inode: ICB evicted, cleaning up stale entry" ); - let now = std::time::SystemTime::now(); - let attr = FileAttr::Directory { - common: self - .icache - .make_common_file_attr(existing_ino, 0o755, now, now), - }; - self.icache.cache_attr(existing_ino, attr); - return (existing_ino, attr); + self.composite.child_inodes.remove(&existing_ino); + self.composite.inode_to_slot.remove(&existing_ino); } // Allocate new. - let org_name = self.org_slots[org_idx].org.name().to_owned(); - let ino = self.icache.allocate_inode(); + let org_name = self.composite.slots[org_idx].inner.name().to_owned(); + let ino = self.composite.icache.allocate_inode(); trace!(ino, org_idx, org = %org_name, "ensure_org_inode: allocated new inode"); - let now = std::time::SystemTime::now(); - self.icache.insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: org_name.as_str().into(), - parent: Some(Self::ROOT_NODE_INO), - children: None, - attr: None, - }, - ); + let now = SystemTime::now(); + self.composite + .icache + .insert_icb( + ino, + InodeControlBlock { + rc: 0, + path: org_name.as_str().into(), + parent: Some(Self::ROOT_NODE_INO), + attr: None, + children: None, + }, + ) + .await; - self.org_inodes.insert(ino, org_idx); + self.composite.child_inodes.insert(ino, org_idx); + self.composite.inode_to_slot.insert(ino, org_idx); - // Seed bridge: mesa org-root ↔ OrgFs::ROOT_INO. - self.org_slots[org_idx] + // Reset bridge (may have stale mappings from a previous eviction cycle) + // and seed: mesa org-root <-> OrgFs::ROOT_INO. + self.composite.slots[org_idx].bridge = HashMapBridge::new(); + self.composite.slots[org_idx] .bridge .insert_inode(ino, OrgFs::ROOT_INO); let attr = FileAttr::Directory { - common: self.icache.make_common_file_attr(ino, 0o755, now, now), + common: mescloud_icache::make_common_file_attr( + ino, + 0o755, + now, + now, + self.composite.icache.fs_owner(), + self.composite.icache.block_size(), + ), }; - self.icache.cache_attr(ino, attr); + self.composite.icache.cache_attr(ino, attr).await; (ino, attr) } - - /// Allocate a mesa-level file handle and map it through the bridge. - fn alloc_fh(&mut self, slot_idx: usize, org_fh: FileHandle) -> FileHandle { - let fh = self.icache.allocate_fh(); - self.org_slots[slot_idx].bridge.insert_fh(fh, org_fh); - fh - } - - /// Translate an org inode to a mesa inode, allocating if needed. - /// Also mirrors the ICB into the mesa `inode_table`. - fn translate_org_ino_to_mesa( - &mut self, - slot_idx: usize, - org_ino: Inode, - parent_mesa_ino: Inode, - name: &OsStr, - ) -> Inode { - let mesa_ino = self.org_slots[slot_idx] - .bridge - .backward_or_insert_inode(org_ino, || self.icache.allocate_inode()); - - self.icache - .entry_or_insert_icb(mesa_ino, || InodeControlBlock { - rc: 0, - path: name.into(), - parent: Some(parent_mesa_ino), - children: None, - attr: None, - }); - - mesa_ino - } } #[async_trait::async_trait] @@ -217,80 +260,59 @@ impl Fs for MesaFS { type ReaddirError = ReadDirError; type ReleaseError = ReleaseError; - #[instrument(skip(self))] + #[instrument(name = "MesaFS::lookup", skip(self))] async fn lookup(&mut self, parent: Inode, name: &OsStr) -> Result { - debug_assert!( - self.icache.contains(parent), - "lookup: parent inode {parent} not in inode table" - ); - - match self.inode_role(parent) { + let role = self.inode_role(parent).ok_or(LookupError::InodeNotFound)?; + match role { InodeRole::Root => { - // Children of root are orgs. let org_name = name.to_str().ok_or(LookupError::InodeNotFound)?; let org_idx = self - .org_slots + .composite + .slots .iter() - .position(|s| s.org.name() == org_name) + .position(|s| s.inner.name() == org_name) .ok_or(LookupError::InodeNotFound)?; trace!(org = org_name, "lookup: matched org"); - let (ino, attr) = self.ensure_org_inode(org_idx); - let rc = self.icache.inc_rc(ino); + let (ino, attr) = self.ensure_org_inode(org_idx).await; + let rc = self + .composite + .icache + .inc_rc(ino) + .await + .ok_or(LookupError::InodeNotFound)?; trace!(ino, org = org_name, rc, "lookup: resolved org inode"); Ok(attr) } - InodeRole::OrgOwned { idx } => { - // Delegate to org. - let org_parent = self.org_slots[idx] - .bridge - .forward_or_insert_inode(parent, || unreachable!("forward should find parent")); - - let org_attr = self.org_slots[idx].org.lookup(org_parent, name).await?; - let org_ino = org_attr.common().ino; - - let mesa_ino = self.translate_org_ino_to_mesa(idx, org_ino, parent, name); - - let mesa_attr = self.org_slots[idx].bridge.attr_backward(org_attr); - self.icache.cache_attr(mesa_ino, mesa_attr); - let rc = self.icache.inc_rc(mesa_ino); - trace!(mesa_ino, org_ino, rc, "lookup: resolved via org delegation"); - Ok(mesa_attr) - } + InodeRole::OrgOwned => self.composite.delegated_lookup(parent, name).await, } } - #[instrument(skip(self))] + #[instrument(name = "MesaFS::getattr", skip(self))] async fn getattr( &mut self, ino: Inode, _fh: Option, ) -> Result { - self.icache.get_attr(ino).ok_or_else(|| { - warn!(ino, "getattr on unknown inode"); - GetAttrError::InodeNotFound - }) + self.composite.delegated_getattr(ino).await } - #[instrument(skip(self))] + #[instrument(name = "MesaFS::readdir", skip(self))] async fn readdir(&mut self, ino: Inode) -> Result<&[DirEntry], ReadDirError> { - debug_assert!( - self.icache.contains(ino), - "readdir: inode {ino} not in inode table" - ); - - match self.inode_role(ino) { + let role = self.inode_role(ino).ok_or(ReadDirError::InodeNotFound)?; + match role { InodeRole::Root => { let org_info: Vec<(usize, String)> = self - .org_slots + .composite + .slots .iter() .enumerate() - .map(|(idx, s)| (idx, s.org.name().to_owned())) + .map(|(idx, s)| (idx, s.inner.name().to_owned())) .collect(); let mut entries = Vec::with_capacity(org_info.len()); for (org_idx, name) in &org_info { - let (org_ino, _) = self.ensure_org_inode(*org_idx); + let (org_ino, _) = self.ensure_org_inode(*org_idx).await; entries.push(DirEntry { ino: org_ino, name: name.clone().into(), @@ -299,77 +321,19 @@ impl Fs for MesaFS { } trace!(entry_count = entries.len(), "readdir: listing orgs"); - - let icb = self - .icache - .get_icb_mut(ino) - .ok_or(ReadDirError::InodeNotFound)?; - Ok(icb.children.insert(entries)) - } - InodeRole::OrgOwned { idx } => { - let org_ino = self.org_slots[idx] - .bridge - .forward_or_insert_inode(ino, || unreachable!("readdir: ino should be mapped")); - - let org_entries = self.org_slots[idx].org.readdir(org_ino).await?; - let org_entries: Vec = org_entries.to_vec(); - - let mut mesa_entries = Vec::with_capacity(org_entries.len()); - for entry in &org_entries { - let mesa_child_ino = - self.translate_org_ino_to_mesa(idx, entry.ino, ino, &entry.name); - - // Cache attr from org if available. - if let Some(org_icb_attr) = - self.org_slots[idx].org.inode_table_get_attr(entry.ino) - { - let mesa_attr = self.org_slots[idx].bridge.attr_backward(org_icb_attr); - self.icache.cache_attr(mesa_child_ino, mesa_attr); - } - - mesa_entries.push(DirEntry { - ino: mesa_child_ino, - name: entry.name.clone(), - kind: entry.kind, - }); - } - - let icb = self - .icache - .get_icb_mut(ino) - .ok_or(ReadDirError::InodeNotFound)?; - Ok(icb.children.insert(mesa_entries)) + self.composite.readdir_buf = entries; + Ok(&self.composite.readdir_buf) } + InodeRole::OrgOwned => self.composite.delegated_readdir(ino).await, } } - #[instrument(skip(self))] + #[instrument(name = "MesaFS::open", skip(self))] async fn open(&mut self, ino: Inode, flags: OpenFlags) -> Result { - let idx = self.org_slot_for_inode(ino).ok_or_else(|| { - warn!(ino, "open on inode not belonging to any org"); - OpenError::InodeNotFound - })?; - - let org_ino = self.org_slots[idx] - .bridge - .forward_or_insert_inode(ino, || unreachable!("open: ino should be mapped")); - - let org_open = self.org_slots[idx].org.open(org_ino, flags).await?; - let mesa_fh = self.alloc_fh(idx, org_open.handle); - - trace!( - ino, - mesa_fh, - org_fh = org_open.handle, - "open: assigned file handle" - ); - Ok(OpenFile { - handle: mesa_fh, - options: org_open.options, - }) + self.composite.delegated_open(ino, flags).await } - #[instrument(skip(self))] + #[instrument(name = "MesaFS::read", skip(self))] async fn read( &mut self, ino: Inode, @@ -379,26 +343,12 @@ impl Fs for MesaFS { flags: OpenFlags, lock_owner: Option, ) -> Result { - let idx = self.org_slot_for_inode(ino).ok_or_else(|| { - warn!(ino, "read on inode not belonging to any org"); - ReadError::InodeNotFound - })?; - - let org_ino = self.org_slots[idx] - .bridge - .forward_or_insert_inode(ino, || unreachable!("read: ino should be mapped")); - let org_fh = self.org_slots[idx].bridge.fh_forward(fh).ok_or_else(|| { - warn!(fh, "read: no fh mapping found"); - ReadError::FileNotOpen - })?; - - self.org_slots[idx] - .org - .read(org_ino, org_fh, offset, size, flags, lock_owner) + self.composite + .delegated_read(ino, fh, offset, size, flags, lock_owner) .await } - #[instrument(skip(self))] + #[instrument(name = "MesaFS::release", skip(self))] async fn release( &mut self, ino: Inode, @@ -406,53 +356,18 @@ impl Fs for MesaFS { flags: OpenFlags, flush: bool, ) -> Result<(), ReleaseError> { - let idx = self.org_slot_for_inode(ino).ok_or_else(|| { - warn!(ino, "release on inode not belonging to any org"); - ReleaseError::FileNotOpen - })?; - - let org_ino = self.org_slots[idx] - .bridge - .forward_or_insert_inode(ino, || unreachable!("release: ino should be mapped")); - let org_fh = self.org_slots[idx].bridge.fh_forward(fh).ok_or_else(|| { - warn!(fh, "release: no fh mapping found"); - ReleaseError::FileNotOpen - })?; - - let result = self.org_slots[idx] - .org - .release(org_ino, org_fh, flags, flush) - .await; - - self.org_slots[idx].bridge.remove_fh_by_left(fh); - trace!(ino, fh, "release: cleaned up fh mapping"); - - result + self.composite + .delegated_release(ino, fh, flags, flush) + .await } - #[instrument(skip(self))] + #[instrument(name = "MesaFS::forget", skip(self))] async fn forget(&mut self, ino: Inode, nlookups: u64) { - debug_assert!( - self.icache.contains(ino), - "forget: inode {ino} not in inode table" - ); - - // Propagate forget to inner org if applicable. - if let Some(idx) = self.org_slot_for_inode(ino) - && let Some(&org_ino) = self.org_slots[idx].bridge.inode_map_get_by_left(ino) - { - self.org_slots[idx].org.forget(org_ino, nlookups).await; - } - - if self.icache.forget(ino, nlookups).is_some() { - self.org_inodes.remove(&ino); - for slot in &mut self.org_slots { - slot.bridge.remove_inode_by_left(ino); - } - } + // MesaFS has no extra state to clean up on eviction (unlike OrgFs::owner_inodes). + let _ = self.composite.delegated_forget(ino, nlookups).await; } async fn statfs(&mut self) -> Result { - Ok(self.icache.statfs()) + Ok(self.composite.delegated_statfs()) } } diff --git a/src/fs/mescloud/org.rs b/src/fs/mescloud/org.rs index fcfd522..968c748 100644 --- a/src/fs/mescloud/org.rs +++ b/src/fs/mescloud/org.rs @@ -1,37 +1,79 @@ use std::collections::HashMap; use std::ffi::OsStr; +use std::future::Future; use std::time::SystemTime; use bytes::Bytes; use futures::TryStreamExt as _; use mesa_dev::MesaClient; use secrecy::SecretString; -use tracing::{instrument, trace, warn}; +use tracing::{Instrument as _, instrument, trace, warn}; pub use super::common::{ GetAttrError, LookupError, OpenError, ReadDirError, ReadError, ReleaseError, }; use super::common::{InodeControlBlock, MesaApiError}; +use super::composite::{ChildSlot, CompositeFs}; +use super::icache as mescloud_icache; use super::icache::MescloudICache; use super::repo::RepoFs; use crate::fs::icache::bridge::HashMapBridge; +use crate::fs::icache::{AsyncICache, FileTable, IcbResolver}; use crate::fs::r#trait::{ DirEntry, DirEntryType, FileAttr, FileHandle, FilesystemStats, Fs, Inode, LockOwner, OpenFile, OpenFlags, }; +pub(super) struct OrgResolver { + fs_owner: (u32, u32), + block_size: u32, +} + +impl IcbResolver for OrgResolver { + type Icb = InodeControlBlock; + type Error = LookupError; + + fn resolve( + &self, + ino: Inode, + stub: Option, + _cache: &AsyncICache, + ) -> impl Future> + Send + where + Self: Sized, + { + let fs_owner = self.fs_owner; + let block_size = self.block_size; + async move { + let stub = stub.unwrap_or_else(|| InodeControlBlock { + parent: None, + path: "/".into(), + rc: 0, + attr: None, + children: None, + }); + let now = SystemTime::now(); + let attr = FileAttr::Directory { + common: mescloud_icache::make_common_file_attr( + ino, 0o755, now, now, fs_owner, block_size, + ), + }; + Ok(InodeControlBlock { + attr: Some(attr), + children: Some(vec![]), + ..stub + }) + } + .instrument(tracing::info_span!("OrgResolver::resolve", ino)) + } +} + #[derive(Debug, Clone)] pub struct OrgConfig { pub name: String, pub api_key: SecretString, } -/// Per-repo wrapper with inode and file handle translation. -struct RepoSlot { - repo: RepoFs, - bridge: HashMapBridge, // left = org, right = repo -} - /// Classifies an inode by its role in the org hierarchy. enum InodeRole { /// The org root directory. @@ -39,25 +81,19 @@ enum InodeRole { /// A virtual owner directory (github only). OwnerDir, /// An inode owned by some repo. - RepoOwned { idx: usize }, + RepoOwned, } /// A filesystem rooted at a single organization. /// -/// Owns multiple [`RepoFs`] instances and translates inodes between its namespace -/// and each repo's namespace using [`HashMapBridge`]. +/// Composes multiple [`RepoFs`] instances, each with its own inode namespace, +/// delegating to [`CompositeFs`] for inode/fh translation at each boundary. pub struct OrgFs { name: String, client: MesaClient, - - icache: MescloudICache, - - /// Maps org-level repo-root inodes → index into `repos`. - repo_inodes: HashMap, - /// Maps org-level owner-dir inodes → owner name. - /// Only populated when org name is "github". + composite: CompositeFs, + /// Maps org-level owner-dir inodes to owner name (github only). owner_inodes: HashMap, - repos: Vec, } impl OrgFs { @@ -65,6 +101,7 @@ impl OrgFs { const BLOCK_SIZE: u32 = 4096; /// The name of the organization. + #[must_use] pub(crate) fn name(&self) -> &str { &self.name } @@ -75,18 +112,6 @@ impl OrgFs { self.name == "github" } - /// Decode a base64-encoded repo name from the API. Returns "owner/repo". - /// TODO(MES-674): Cleanup "special" casing for github. - #[expect(dead_code)] - fn decode_github_repo_name(encoded: &str) -> Option { - use base64::Engine as _; - let bytes = base64::engine::general_purpose::STANDARD - .decode(encoded) - .ok()?; - let decoded = String::from_utf8(bytes).ok()?; - decoded.contains('/').then_some(decoded) - } - /// Encode "owner/repo" to base64 for API calls. /// TODO(MES-674): Cleanup "special" casing for github. fn encode_github_repo_name(decoded: &str) -> String { @@ -96,101 +121,105 @@ impl OrgFs { /// Ensure an inode exists for a virtual owner directory (github only). Does NOT bump rc. /// TODO(MES-674): Cleanup "special" casing for github. - fn ensure_owner_inode(&mut self, owner: &str) -> (Inode, FileAttr) { + async fn ensure_owner_inode(&mut self, owner: &str) -> (Inode, FileAttr) { // Check existing + let mut stale_ino = None; for (&ino, existing_owner) in &self.owner_inodes { if existing_owner == owner { - if let Some(attr) = self.icache.get_attr(ino) { + if let Some(attr) = self.composite.icache.get_attr(ino).await { + return (ino, attr); + } + if self.composite.icache.contains(ino) { + // ICB exists but attr missing — rebuild and cache + let now = SystemTime::now(); + let attr = FileAttr::Directory { + common: mescloud_icache::make_common_file_attr( + ino, + 0o755, + now, + now, + self.composite.icache.fs_owner(), + self.composite.icache.block_size(), + ), + }; + self.composite.icache.cache_attr(ino, attr).await; return (ino, attr); } - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: self.icache.make_common_file_attr(ino, 0o755, now, now), - }; - self.icache.cache_attr(ino, attr); - return (ino, attr); + // ICB was evicted — mark for cleanup + stale_ino = Some(ino); + break; } } + if let Some(ino) = stale_ino { + self.owner_inodes.remove(&ino); + } // Allocate new - let ino = self.icache.allocate_inode(); + let ino = self.composite.icache.allocate_inode(); let now = SystemTime::now(); - self.icache.insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: owner.into(), - parent: Some(Self::ROOT_INO), - children: None, - attr: None, - }, - ); + self.composite + .icache + .insert_icb( + ino, + InodeControlBlock { + rc: 0, + path: owner.into(), + parent: Some(Self::ROOT_INO), + attr: None, + children: None, + }, + ) + .await; self.owner_inodes.insert(ino, owner.to_owned()); let attr = FileAttr::Directory { - common: self.icache.make_common_file_attr(ino, 0o755, now, now), + common: mescloud_icache::make_common_file_attr( + ino, + 0o755, + now, + now, + self.composite.icache.fs_owner(), + self.composite.icache.block_size(), + ), }; - self.icache.cache_attr(ino, attr); + self.composite.icache.cache_attr(ino, attr).await; (ino, attr) } - /// Get the cached attr for an inode, if present. - pub(crate) fn inode_table_get_attr(&self, ino: Inode) -> Option { - self.icache.get_attr(ino) - } - + #[must_use] pub fn new(name: String, client: MesaClient, fs_owner: (u32, u32)) -> Self { + let resolver = OrgResolver { + fs_owner, + block_size: Self::BLOCK_SIZE, + }; Self { name, client, - icache: MescloudICache::new(Self::ROOT_INO, fs_owner, Self::BLOCK_SIZE), - repo_inodes: HashMap::new(), + composite: CompositeFs { + icache: MescloudICache::new(resolver, Self::ROOT_INO, fs_owner, Self::BLOCK_SIZE), + file_table: FileTable::new(), + readdir_buf: Vec::new(), + child_inodes: HashMap::new(), + inode_to_slot: HashMap::new(), + slots: Vec::new(), + }, owner_inodes: HashMap::new(), - repos: Vec::new(), } } /// Classify an inode by its role. - fn inode_role(&self, ino: Inode) -> InodeRole { + fn inode_role(&self, ino: Inode) -> Option { if ino == Self::ROOT_INO { - return InodeRole::OrgRoot; + return Some(InodeRole::OrgRoot); } if self.owner_inodes.contains_key(&ino) { - return InodeRole::OwnerDir; - } - if let Some(&idx) = self.repo_inodes.get(&ino) { - return InodeRole::RepoOwned { idx }; - } - // Walk parent chain to find owning repo. - if let Some(idx) = self.repo_slot_for_inode(ino) { - return InodeRole::RepoOwned { idx }; + return Some(InodeRole::OwnerDir); } - // Shouldn't happen — all non-root inodes should be repo-owned. - trace!( - ino, - "inode_role: inode not found in any repo slot, falling back to OrgRoot" - ); - debug_assert!(false, "inode {ino} not found in any repo slot"); - InodeRole::OrgRoot - } - - /// Find the repo slot index that owns `ino` by walking the parent chain. - fn repo_slot_for_inode(&self, ino: Inode) -> Option { - // Direct repo root? - if let Some(&idx) = self.repo_inodes.get(&ino) { - return Some(idx); + if self.composite.child_inodes.contains_key(&ino) { + return Some(InodeRole::RepoOwned); } - // Walk parents. - let mut current = ino; - while let Some(parent) = self.icache.get_icb(current).and_then(|icb| icb.parent) { - if let Some(&idx) = self.repo_inodes.get(&parent) { - return Some(idx); - } - current = parent; + if self.composite.slot_for_inode(ino).is_some() { + return Some(InodeRole::RepoOwned); } - trace!( - ino, - "repo_slot_for_inode: exhausted parent chain without finding repo" - ); None } @@ -200,7 +229,7 @@ impl OrgFs { /// - `repo_name`: name used for API calls / `RepoFs` (base64-encoded for github) /// - `display_name`: name shown in filesystem ("linux" for github, same as `repo_name` otherwise) /// - `parent_ino`: owner-dir inode for github, `ROOT_INO` otherwise - fn ensure_repo_inode( + async fn ensure_repo_inode( &mut self, repo_name: &str, display_name: &str, @@ -208,73 +237,136 @@ impl OrgFs { parent_ino: Inode, ) -> (Inode, FileAttr) { // Check existing repos. - for (&ino, &idx) in &self.repo_inodes { - if self.repos[idx].repo.repo_name() == repo_name { - if let Some(icb) = self.icache.get_icb(ino) - && let Some(attr) = icb.attr - { - trace!( - ino, - repo = repo_name, - rc = icb.rc, - "ensure_repo_inode: reusing" - ); + for (&ino, &idx) in &self.composite.child_inodes { + if self.composite.slots[idx].inner.repo_name() == repo_name { + if let Some(attr) = self.composite.icache.get_attr(ino).await { + let rc = self + .composite + .icache + .get_icb(ino, |icb| icb.rc) + .await + .unwrap_or(0); + trace!(ino, repo = repo_name, rc, "ensure_repo_inode: reusing"); return (ino, attr); } - // Attr missing — rebuild. warn!( ino, repo = repo_name, "ensure_repo_inode: attr missing, rebuilding" ); - let now = SystemTime::now(); - let attr = FileAttr::Directory { - common: self.icache.make_common_file_attr(ino, 0o755, now, now), - }; - self.icache.cache_attr(ino, attr); - return (ino, attr); + return self.make_repo_dir_attr(ino).await; } } - // Allocate new. - let ino = self.icache.allocate_inode(); + // Check for orphaned slot (slot exists but not in child_inodes). + if let Some(idx) = self + .composite + .slots + .iter() + .position(|s| s.inner.repo_name() == repo_name) + { + return self.register_repo_slot(idx, display_name, parent_ino).await; + } + + // Allocate truly new slot. + let ino = self.composite.icache.allocate_inode(); trace!( ino, repo = repo_name, "ensure_repo_inode: allocated new inode" ); - let now = SystemTime::now(); - self.icache.insert_icb( - ino, - InodeControlBlock { - rc: 0, - path: display_name.into(), - parent: Some(parent_ino), - children: None, - attr: None, - }, - ); + self.composite + .icache + .insert_icb( + ino, + InodeControlBlock { + rc: 0, + path: display_name.into(), + parent: Some(parent_ino), + attr: None, + children: None, + }, + ) + .await; let repo = RepoFs::new( self.client.clone(), self.name.clone(), repo_name.to_owned(), default_branch.to_owned(), - self.icache.fs_owner(), + self.composite.icache.fs_owner(), ); let mut bridge = HashMapBridge::new(); bridge.insert_inode(ino, RepoFs::ROOT_INO); - let idx = self.repos.len(); - self.repos.push(RepoSlot { repo, bridge }); - self.repo_inodes.insert(ino, idx); + let idx = self.composite.slots.len(); + self.composite.slots.push(ChildSlot { + inner: repo, + bridge, + }); + self.composite.child_inodes.insert(ino, idx); + self.composite.inode_to_slot.insert(ino, idx); + + self.make_repo_dir_attr(ino).await + } + + /// Allocate a new inode, register it in an existing (orphaned) slot, and + /// return `(ino, attr)`. + async fn register_repo_slot( + &mut self, + idx: usize, + display_name: &str, + parent_ino: Inode, + ) -> (Inode, FileAttr) { + let ino = self.composite.icache.allocate_inode(); + trace!(ino, idx, "register_repo_slot: reusing orphaned slot"); + + self.composite + .icache + .insert_icb( + ino, + InodeControlBlock { + rc: 0, + path: display_name.into(), + parent: Some(parent_ino), + attr: None, + children: None, + }, + ) + .await; + warn!( + ino, + idx, + "register_repo_slot: resetting bridge for orphaned slot; \ + inner filesystem will not receive forget for stale inode mappings" + ); + self.composite.slots[idx].bridge = HashMapBridge::new(); + self.composite.slots[idx] + .bridge + .insert_inode(ino, RepoFs::ROOT_INO); + self.composite.child_inodes.insert(ino, idx); + self.composite.inode_to_slot.insert(ino, idx); + + self.make_repo_dir_attr(ino).await + } + + /// Build and cache a directory attr for `ino`, returning `(ino, attr)`. + async fn make_repo_dir_attr(&self, ino: Inode) -> (Inode, FileAttr) { + let now = SystemTime::now(); let attr = FileAttr::Directory { - common: self.icache.make_common_file_attr(ino, 0o755, now, now), + common: mescloud_icache::make_common_file_attr( + ino, + 0o755, + now, + now, + self.composite.icache.fs_owner(), + self.composite.icache.block_size(), + ), }; - self.icache.cache_attr(ino, attr); + self.composite.icache.cache_attr(ino, attr).await; (ino, attr) } @@ -291,52 +383,12 @@ impl OrgFs { .await .map_err(MesaApiError::from) } +} - /// Allocate an org-level file handle and map it through the bridge. - fn alloc_fh(&mut self, slot_idx: usize, repo_fh: FileHandle) -> FileHandle { - let fh = self.icache.allocate_fh(); - self.repos[slot_idx].bridge.insert_fh(fh, repo_fh); - fh - } - - /// Translate a repo inode to an org inode, allocating if needed. - /// Also mirrors the ICB into the org's `inode_table`. - fn translate_repo_ino_to_org( - &mut self, - slot_idx: usize, - repo_ino: Inode, - parent_org_ino: Inode, - name: &OsStr, - _kind: DirEntryType, - ) -> Inode { - let org_ino = self.repos[slot_idx] - .bridge - .backward_or_insert_inode(repo_ino, || self.icache.allocate_inode()); - - // Ensure there's an ICB in the org table. - let icb = self.icache.entry_or_insert_icb(org_ino, || { - trace!( - org_ino, - repo_ino, - parent = parent_org_ino, - ?name, - "translate: created new org ICB" - ); - InodeControlBlock { - rc: 0, - path: name.into(), - parent: Some(parent_org_ino), - children: None, - attr: None, - } - }); - - // Log reuse case. - if icb.rc > 0 || icb.attr.is_some() { - trace!(org_ino, repo_ino, "translate: reused existing org ICB"); - } - - org_ino +#[async_trait::async_trait] +impl super::common::InodeCachePeek for OrgFs { + async fn peek_attr(&self, ino: Inode) -> Option { + self.composite.icache.get_attr(ino).await } } @@ -349,14 +401,10 @@ impl Fs for OrgFs { type ReaddirError = ReadDirError; type ReleaseError = ReleaseError; - #[instrument(skip(self), fields(org = %self.name))] + #[instrument(name = "OrgFs::lookup", skip(self), fields(org = %self.name))] async fn lookup(&mut self, parent: Inode, name: &OsStr) -> Result { - debug_assert!( - self.icache.contains(parent), - "lookup: parent inode {parent} not in inode table" - ); - - match self.inode_role(parent) { + let role = self.inode_role(parent).ok_or(LookupError::InodeNotFound)?; + match role { InodeRole::OrgRoot => { // TODO(MES-674): Cleanup "special" casing for github. let name_str = name.to_str().ok_or(LookupError::InodeNotFound)?; @@ -364,8 +412,12 @@ impl Fs for OrgFs { if self.is_github() { // name is an owner like "torvalds" — create lazily, no API validation. trace!(owner = name_str, "lookup: resolving github owner dir"); - let (ino, attr) = self.ensure_owner_inode(name_str); - self.icache.inc_rc(ino); + let (ino, attr) = self.ensure_owner_inode(name_str).await; + self.composite + .icache + .inc_rc(ino) + .await + .ok_or(LookupError::InodeNotFound)?; Ok(attr) } else { // Children of org root are repos. @@ -374,13 +426,15 @@ impl Fs for OrgFs { // Validate repo exists via API. let repo = self.wait_for_sync(name_str).await?; - let (ino, attr) = self.ensure_repo_inode( - name_str, - name_str, - &repo.default_branch, - Self::ROOT_INO, - ); - let rc = self.icache.inc_rc(ino); + let (ino, attr) = self + .ensure_repo_inode(name_str, name_str, &repo.default_branch, Self::ROOT_INO) + .await; + let rc = self + .composite + .icache + .inc_rc(ino) + .await + .ok_or(LookupError::InodeNotFound)?; trace!(ino, repo = name_str, rc, "lookup: resolved repo inode"); Ok(attr) } @@ -407,56 +461,33 @@ impl Fs for OrgFs { // Validate via API (uses encoded name). let repo = self.wait_for_sync(&encoded).await?; - let (ino, attr) = - self.ensure_repo_inode(&encoded, repo_name_str, &repo.default_branch, parent); - self.icache.inc_rc(ino); + let (ino, attr) = self + .ensure_repo_inode(&encoded, repo_name_str, &repo.default_branch, parent) + .await; + self.composite + .icache + .inc_rc(ino) + .await + .ok_or(LookupError::InodeNotFound)?; Ok(attr) } - InodeRole::RepoOwned { idx } => { - // Delegate to repo. - let repo_parent = self.repos[idx] - .bridge - .forward_or_insert_inode(parent, || unreachable!("forward should find parent")); - // ^ forward should always find parent since it was previously mapped. - // Using forward_or_insert just for safety, but the allocate closure should never run. - - let repo_attr = self.repos[idx].repo.lookup(repo_parent, name).await?; - let repo_ino = repo_attr.common().ino; - - // Translate back to org namespace. - let kind: DirEntryType = repo_attr.into(); - let org_ino = self.translate_repo_ino_to_org(idx, repo_ino, parent, name, kind); - - // Rebuild attr with org inode. - let org_attr = self.repos[idx].bridge.attr_backward(repo_attr); - self.icache.cache_attr(org_ino, org_attr); - let rc = self.icache.inc_rc(org_ino); - trace!(org_ino, repo_ino, rc, "lookup: resolved content inode"); - Ok(org_attr) - } + InodeRole::RepoOwned => self.composite.delegated_lookup(parent, name).await, } } - #[instrument(skip(self), fields(org = %self.name))] + #[instrument(name = "OrgFs::getattr", skip(self), fields(org = %self.name))] async fn getattr( &mut self, ino: Inode, _fh: Option, ) -> Result { - self.icache.get_attr(ino).ok_or_else(|| { - warn!(ino, "getattr on unknown inode"); - GetAttrError::InodeNotFound - }) + self.composite.delegated_getattr(ino).await } - #[instrument(skip(self), fields(org = %self.name))] + #[instrument(name = "OrgFs::readdir", skip(self), fields(org = %self.name))] async fn readdir(&mut self, ino: Inode) -> Result<&[DirEntry], ReadDirError> { - debug_assert!( - self.icache.contains(ino), - "readdir: inode {ino} not in inode table" - ); - - match self.inode_role(ino) { + let role = self.inode_role(ino).ok_or(ReadDirError::InodeNotFound)?; + match role { InodeRole::OrgRoot => { // TODO(MES-674): Cleanup "special" casing for github. if self.is_github() { @@ -485,12 +516,9 @@ impl Fs for OrgFs { let mut entries = Vec::with_capacity(repo_infos.len()); for (repo_name, default_branch) in &repo_infos { - let (repo_ino, _) = self.ensure_repo_inode( - repo_name, - repo_name, - default_branch, - Self::ROOT_INO, - ); + let (repo_ino, _) = self + .ensure_repo_inode(repo_name, repo_name, default_branch, Self::ROOT_INO) + .await; entries.push(DirEntry { ino: repo_ino, name: repo_name.clone().into(), @@ -498,96 +526,24 @@ impl Fs for OrgFs { }); } - let icb = self - .icache - .get_icb_mut(ino) - .ok_or(ReadDirError::InodeNotFound)?; - Ok(icb.children.insert(entries)) + self.composite.readdir_buf = entries; + Ok(&self.composite.readdir_buf) } InodeRole::OwnerDir if self.is_github() => { // TODO(MES-674): Cleanup "special" casing for github. - return Err(ReadDirError::NotPermitted); - } - InodeRole::OwnerDir => { - return Err(ReadDirError::NotADirectory); - } - InodeRole::RepoOwned { idx } => { - // Delegate to repo. - let repo_ino = self.repos[idx] - .bridge - .forward_or_insert_inode(ino, || unreachable!("readdir: ino should be mapped")); - - let repo_entries = self.repos[idx].repo.readdir(repo_ino).await?; - // Clone entries to release borrow on repo before mutating self. - let repo_entries: Vec = repo_entries.to_vec(); - - let mut org_entries = Vec::with_capacity(repo_entries.len()); - for entry in &repo_entries { - let org_child_ino = self.translate_repo_ino_to_org( - idx, - entry.ino, - ino, - &entry.name, - entry.kind, - ); - - // Cache attr from repo if available. - if let Some(repo_icb_attr) = - self.repos[idx].repo.inode_table_get_attr(entry.ino) - { - let org_attr = self.repos[idx].bridge.attr_backward(repo_icb_attr); - self.icache.cache_attr(org_child_ino, org_attr); - } else { - trace!( - repo_ino = entry.ino, - org_ino = org_child_ino, - "readdir: no cached attr from repo to propagate" - ); - } - - org_entries.push(DirEntry { - ino: org_child_ino, - name: entry.name.clone(), - kind: entry.kind, - }); - } - - let icb = self - .icache - .get_icb_mut(ino) - .ok_or(ReadDirError::InodeNotFound)?; - Ok(icb.children.insert(org_entries)) + Err(ReadDirError::NotPermitted) } + InodeRole::OwnerDir => Err(ReadDirError::NotADirectory), + InodeRole::RepoOwned => self.composite.delegated_readdir(ino).await, } } - #[instrument(skip(self), fields(org = %self.name))] + #[instrument(name = "OrgFs::open", skip(self), fields(org = %self.name))] async fn open(&mut self, ino: Inode, flags: OpenFlags) -> Result { - let idx = self.repo_slot_for_inode(ino).ok_or_else(|| { - warn!(ino, "open on inode not belonging to any repo"); - OpenError::InodeNotFound - })?; - - let repo_ino = self.repos[idx] - .bridge - .forward_or_insert_inode(ino, || unreachable!("open: ino should be mapped")); - - let repo_open = self.repos[idx].repo.open(repo_ino, flags).await?; - let org_fh = self.alloc_fh(idx, repo_open.handle); - - trace!( - ino, - org_fh, - repo_fh = repo_open.handle, - "open: assigned file handle" - ); - Ok(OpenFile { - handle: org_fh, - options: repo_open.options, - }) + self.composite.delegated_open(ino, flags).await } - #[instrument(skip(self), fields(org = %self.name))] + #[instrument(name = "OrgFs::read", skip(self), fields(org = %self.name))] async fn read( &mut self, ino: Inode, @@ -597,30 +553,12 @@ impl Fs for OrgFs { flags: OpenFlags, lock_owner: Option, ) -> Result { - let idx = self.repo_slot_for_inode(ino).ok_or_else(|| { - warn!(ino, "read on inode not belonging to any repo"); - ReadError::InodeNotFound - })?; - - let repo_ino = self.repos[idx] - .bridge - .forward_or_insert_inode(ino, || unreachable!("read: ino should be mapped")); - let repo_fh = self.repos[idx].bridge.fh_forward(fh).ok_or_else(|| { - warn!(fh, "read: no fh mapping found"); - ReadError::FileNotOpen - })?; - - trace!( - ino, - fh, repo_ino, repo_fh, offset, size, "read: delegating to repo" - ); - self.repos[idx] - .repo - .read(repo_ino, repo_fh, offset, size, flags, lock_owner) + self.composite + .delegated_read(ino, fh, offset, size, flags, lock_owner) .await } - #[instrument(skip(self), fields(org = %self.name))] + #[instrument(name = "OrgFs::release", skip(self), fields(org = %self.name))] async fn release( &mut self, ino: Inode, @@ -628,63 +566,20 @@ impl Fs for OrgFs { flags: OpenFlags, flush: bool, ) -> Result<(), ReleaseError> { - let idx = self.repo_slot_for_inode(ino).ok_or_else(|| { - warn!(ino, "release on inode not belonging to any repo"); - ReleaseError::FileNotOpen - })?; - - let repo_ino = self.repos[idx] - .bridge - .forward_or_insert_inode(ino, || unreachable!("release: ino should be mapped")); - let repo_fh = self.repos[idx].bridge.fh_forward(fh).ok_or_else(|| { - warn!(fh, "release: no fh mapping found"); - ReleaseError::FileNotOpen - })?; - - trace!(ino, fh, repo_ino, repo_fh, "release: delegating to repo"); - let result = self.repos[idx] - .repo - .release(repo_ino, repo_fh, flags, flush) - .await; - - // Clean up fh mapping. - self.repos[idx].bridge.remove_fh_by_left(fh); - trace!(ino, fh, "release: cleaned up fh mapping"); - - result + self.composite + .delegated_release(ino, fh, flags, flush) + .await } - #[instrument(skip(self), fields(org = %self.name))] + #[instrument(name = "OrgFs::forget", skip(self), fields(org = %self.name))] async fn forget(&mut self, ino: Inode, nlookups: u64) { - debug_assert!( - self.icache.contains(ino), - "forget: inode {ino} not in inode table" - ); - - // Propagate forget to inner repo if applicable. - if let Some(idx) = self.repo_slot_for_inode(ino) { - if let Some(&repo_ino) = self.repos[idx].bridge.inode_map_get_by_left(ino) { - self.repos[idx].repo.forget(repo_ino, nlookups).await; - } else { - trace!( - ino, - "forget: no bridge mapping found, skipping repo propagation" - ); - } - } - - if self.icache.forget(ino, nlookups).is_some() { - // Clean up repo_inodes and owner_inodes mappings. - self.repo_inodes.remove(&ino); + let evicted = self.composite.delegated_forget(ino, nlookups).await; + if evicted { self.owner_inodes.remove(&ino); - // Clean up bridge mapping — find which slot, remove. - for slot in &mut self.repos { - slot.bridge.remove_inode_by_left(ino); - } } } async fn statfs(&mut self) -> Result { - Ok(self.icache.statfs()) + Ok(self.composite.delegated_statfs()) } } diff --git a/src/fs/mescloud/repo.rs b/src/fs/mescloud/repo.rs index 94f7ee8..0d22196 100644 --- a/src/fs/mescloud/repo.rs +++ b/src/fs/mescloud/repo.rs @@ -2,6 +2,7 @@ //! //! This module directly accesses the mesa repo through the Rust SDK, on a per-repo basis. +use std::future::Future; use std::{collections::HashMap, ffi::OsStr, path::PathBuf, time::SystemTime}; use base64::Engine as _; @@ -9,8 +10,9 @@ use bytes::Bytes; use mesa_dev::MesaClient; use mesa_dev::low_level::content::{Content, DirEntry as MesaDirEntry}; use num_traits::cast::ToPrimitive as _; -use tracing::{instrument, trace, warn}; +use tracing::{Instrument as _, instrument, trace, warn}; +use crate::fs::icache::{AsyncICache, FileTable, IcbResolver}; use crate::fs::r#trait::{ DirEntry, DirEntryType, FileAttr, FileHandle, FileOpenOptions, FilesystemStats, Fs, Inode, LockOwner, OpenFile, OpenFlags, @@ -21,7 +23,151 @@ pub use super::common::{ GetAttrError, LookupError, OpenError, ReadDirError, ReadError, ReleaseError, }; use super::icache as mescloud_icache; -use super::icache::MescloudICache; +use super::icache::{InodeControlBlock, MescloudICache}; + +pub(super) struct RepoResolver { + client: MesaClient, + org_name: String, + repo_name: String, + ref_: String, + fs_owner: (u32, u32), + block_size: u32, +} + +impl IcbResolver for RepoResolver { + type Icb = InodeControlBlock; + type Error = LookupError; + + fn resolve( + &self, + ino: Inode, + stub: Option, + cache: &AsyncICache, + ) -> impl Future> + Send + where + Self: Sized, + { + let client = self.client.clone(); + let org_name = self.org_name.clone(); + let repo_name = self.repo_name.clone(); + let ref_ = self.ref_.clone(); + let fs_owner = self.fs_owner; + let block_size = self.block_size; + + async move { + let stub = stub.ok_or(LookupError::InodeNotFound)?; + let file_path = build_repo_path(stub.parent, &stub.path, cache, RepoFs::ROOT_INO).await; + + // Non-root inodes must have a resolvable path. + if stub.parent.is_some() && file_path.is_none() { + return Err(LookupError::InodeNotFound); + } + + let content = client + .org(&org_name) + .repos() + .at(&repo_name) + .content() + .get(Some(ref_.as_str()), file_path.as_deref(), Some(1u64)) + .await + .map_err(MesaApiError::from)?; + + let now = SystemTime::now(); + let attr = match &content { + Content::File(f) => { + let size = f.size.to_u64().unwrap_or(0); + FileAttr::RegularFile { + common: mescloud_icache::make_common_file_attr( + ino, 0o644, now, now, fs_owner, block_size, + ), + size, + blocks: mescloud_icache::blocks_of_size(block_size, size), + } + } + Content::Symlink(s) => { + let size = s.size.to_u64().unwrap_or(0); + FileAttr::RegularFile { + common: mescloud_icache::make_common_file_attr( + ino, 0o644, now, now, fs_owner, block_size, + ), + size, + blocks: mescloud_icache::blocks_of_size(block_size, size), + } + } + Content::Dir(_) => FileAttr::Directory { + common: mescloud_icache::make_common_file_attr( + ino, 0o755, now, now, fs_owner, block_size, + ), + }, + }; + + let children = match content { + Content::Dir(d) => Some( + d.entries + .into_iter() + .filter_map(|e| { + let (name, kind) = match e { + MesaDirEntry::File(f) => (f.name?, DirEntryType::RegularFile), + // TODO(MES-712): return DirEntryType::Symlink once readlink is wired up. + MesaDirEntry::Symlink(s) => (s.name?, DirEntryType::RegularFile), + MesaDirEntry::Dir(d) => (d.name?, DirEntryType::Directory), + }; + Some((name, kind)) + }) + .collect(), + ), + Content::File(_) | Content::Symlink(_) => None, + }; + + Ok(InodeControlBlock { + parent: stub.parent, + path: stub.path, + rc: stub.rc, + attr: Some(attr), + children, + }) + } + .instrument(tracing::info_span!("RepoResolver::resolve", ino)) + } +} + +/// Walk the parent chain in the cache to build the repo-relative path. +/// Returns `None` for the root inode (maps to `path=None` in the mesa content API). +async fn build_repo_path( + parent: Option, + name: &std::path::Path, + cache: &AsyncICache, + root_ino: Inode, +) -> Option { + /// Maximum parent-chain depth before bailing out. Prevents infinite loops + /// if a bug creates a cycle in the parent pointers. + const MAX_DEPTH: usize = 1024; + + let parent = parent?; + if parent == root_ino { + return name.to_str().map(String::from); + } + + let mut components = vec![name.to_path_buf()]; + let mut current = parent; + for _ in 0..MAX_DEPTH { + if current == root_ino { + break; + } + let (path, next_parent) = cache + .get_icb(current, |icb| (icb.path.clone(), icb.parent)) + .await?; + components.push(path); + current = next_parent?; + } + if current != root_ino { + tracing::warn!("build_repo_path: exceeded MAX_DEPTH={MAX_DEPTH}, possible parent cycle"); + return None; + } + components.reverse(); + let joined: PathBuf = components.iter().collect(); + joined.to_str().map(String::from) +} /// A filesystem rooted at a single mesa repository. /// @@ -33,7 +179,9 @@ pub struct RepoFs { repo_name: String, ref_: String, - icache: MescloudICache, + icache: MescloudICache, + file_table: FileTable, + readdir_buf: Vec, open_files: HashMap, } @@ -49,12 +197,22 @@ impl RepoFs { ref_: String, fs_owner: (u32, u32), ) -> Self { + let resolver = RepoResolver { + client: client.clone(), + org_name: org_name.clone(), + repo_name: repo_name.clone(), + ref_: ref_.clone(), + fs_owner, + block_size: Self::BLOCK_SIZE, + }; Self { client, org_name, repo_name, ref_, - icache: MescloudICache::new(Self::ROOT_INO, fs_owner, Self::BLOCK_SIZE), + icache: MescloudICache::new(resolver, Self::ROOT_INO, fs_owner, Self::BLOCK_SIZE), + file_table: FileTable::new(), + readdir_buf: Vec::new(), open_files: HashMap::new(), } } @@ -64,42 +222,48 @@ impl RepoFs { &self.repo_name } - /// Get the cached attr for an inode, if present. - pub(crate) fn inode_table_get_attr(&self, ino: Inode) -> Option { - self.icache.get_attr(ino) - } - /// Build the repo-relative path for an inode by walking up the parent chain. /// /// Returns `None` for the root inode (the repo top-level maps to `path=None` in the /// mesa content API). - fn path_of_inode(&self, ino: Inode) -> Option { + async fn path_of_inode(&self, ino: Inode) -> Option { + /// Maximum parent-chain depth before bailing out. + const MAX_DEPTH: usize = 1024; + if ino == Self::ROOT_INO { return None; } let mut components = Vec::new(); let mut current = ino; - while current != Self::ROOT_INO { - let icb = self.icache.get_icb(current)?; - components.push(icb.path.clone()); - current = icb.parent?; + for _ in 0..MAX_DEPTH { + if current == Self::ROOT_INO { + break; + } + let (path, parent) = self + .icache + .get_icb(current, |icb| (icb.path.clone(), icb.parent)) + .await?; + components.push(path); + current = parent?; + } + if current != Self::ROOT_INO { + tracing::warn!( + ino, + "path_of_inode: exceeded MAX_DEPTH={MAX_DEPTH}, possible parent cycle" + ); + return None; } components.reverse(); let joined: PathBuf = components.iter().collect(); joined.to_str().map(String::from) } +} - /// Build the repo-relative path for a child of `parent`. - fn path_of_child(&self, parent: Inode, name: &OsStr) -> Option { - if parent == Self::ROOT_INO { - return name.to_str().map(String::from); - } - self.path_of_inode(parent).and_then(|p| { - let mut pb = PathBuf::from(p); - pb.push(name); - pb.to_str().map(String::from) - }) +#[async_trait::async_trait] +impl super::common::InodeCachePeek for RepoFs { + async fn peek_attr(&self, ino: Inode) -> Option { + self.icache.get_attr(ino).await } } @@ -112,81 +276,42 @@ impl Fs for RepoFs { type ReaddirError = ReadDirError; type ReleaseError = ReleaseError; - #[instrument(skip(self), fields(repo = %self.repo_name))] + #[instrument(name = "RepoFs::lookup", skip(self), fields(repo = %self.repo_name))] async fn lookup(&mut self, parent: Inode, name: &OsStr) -> Result { debug_assert!( self.icache.contains(parent), "lookup: parent inode {parent} not in inode table" ); - let file_path = self.path_of_child(parent, name); + let ino = self.icache.ensure_child_ino(parent, name).await; + let attr = self + .icache + .get_or_resolve(ino, |icb| icb.attr) + .await? + .ok_or(LookupError::InodeNotFound)?; - let content = self - .client - .org(&self.org_name) - .repos() - .at(&self.repo_name) - .content() - .get(Some(self.ref_.as_str()), file_path.as_deref(), None) + let rc = self + .icache + .inc_rc(ino) .await - .map_err(MesaApiError::from)?; - - #[expect( - clippy::match_same_arms, - reason = "symlink arm will diverge once readlink is wired up" - )] - let kind = match &content { - Content::File(_) => DirEntryType::RegularFile, - // TODO(MES-712): return DirEntryType::Symlink and FileAttr::Symlink, then wire up readlink. - Content::Symlink(_) => DirEntryType::RegularFile, - Content::Dir(_) => DirEntryType::Directory, - }; - - let (ino, _) = self.icache.ensure_child_inode(parent, name, kind); - - let now = SystemTime::now(); - let attr = match &content { - Content::File(f) => { - let size = f.size.to_u64().unwrap_or(0); - FileAttr::RegularFile { - common: self.icache.make_common_file_attr(ino, 0o644, now, now), - size, - blocks: mescloud_icache::blocks_of_size(Self::BLOCK_SIZE, size), - } - } - // TODO(MES-712): return FileAttr::Symlink { target, size } and wire up readlink. - Content::Symlink(s) => { - let size = s.size.to_u64().unwrap_or(0); - FileAttr::RegularFile { - common: self.icache.make_common_file_attr(ino, 0o644, now, now), - size, - blocks: mescloud_icache::blocks_of_size(Self::BLOCK_SIZE, size), - } - } - Content::Dir(_) => FileAttr::Directory { - common: self.icache.make_common_file_attr(ino, 0o755, now, now), - }, - }; - self.icache.cache_attr(ino, attr); - - let rc = self.icache.inc_rc(ino); - trace!(ino, path = ?file_path, rc, "resolved inode"); + .ok_or(LookupError::InodeNotFound)?; + trace!(ino, ?name, rc, "resolved inode"); Ok(attr) } - #[instrument(skip(self), fields(repo = %self.repo_name))] + #[instrument(name = "RepoFs::getattr", skip(self), fields(repo = %self.repo_name))] async fn getattr( &mut self, ino: Inode, _fh: Option, ) -> Result { - self.icache.get_attr(ino).ok_or_else(|| { + self.icache.get_attr(ino).await.ok_or_else(|| { warn!(ino, "getattr on unknown inode"); GetAttrError::InodeNotFound }) } - #[instrument(skip(self), fields(repo = %self.repo_name))] + #[instrument(name = "RepoFs::readdir", skip(self), fields(repo = %self.repo_name))] async fn readdir(&mut self, ino: Inode) -> Result<&[DirEntry], ReadDirError> { debug_assert!( self.icache.contains(ino), @@ -194,47 +319,47 @@ impl Fs for RepoFs { ); debug_assert!( matches!( - self.icache.get_attr(ino), + self.icache.get_attr(ino).await, Some(FileAttr::Directory { .. }) | None ), "readdir: inode {ino} has non-directory cached attr" ); - let file_path = self.path_of_inode(ino); - - let content = self - .client - .org(&self.org_name) - .repos() - .at(&self.repo_name) - .content() - .get(Some(self.ref_.as_str()), file_path.as_deref(), None) - .await - .map_err(MesaApiError::from)?; - - let mesa_entries = match content { - Content::Dir(d) => d.entries, - Content::File(_) | Content::Symlink(_) => return Err(ReadDirError::NotADirectory), - }; + let children = self + .icache + .get_or_resolve(ino, |icb| icb.children.clone()) + .await? + .ok_or(ReadDirError::NotADirectory)?; + + trace!( + ino, + count = children.len(), + "readdir: resolved directory listing from icache" + ); - let collected: Vec<(String, DirEntryType)> = mesa_entries - .into_iter() - .filter_map(|e| { - let (name, kind) = match e { - MesaDirEntry::File(f) => (f.name?, DirEntryType::RegularFile), - // TODO(MES-712): return DirEntryType::Symlink once readlink is wired up. - MesaDirEntry::Symlink(s) => (s.name?, DirEntryType::RegularFile), - MesaDirEntry::Dir(d) => (d.name?, DirEntryType::Directory), + self.icache.evict_zero_rc_children(ino).await; + + let mut entries = Vec::with_capacity(children.len()); + for (name, kind) in &children { + let child_ino = self.icache.ensure_child_ino(ino, OsStr::new(name)).await; + // Only cache directory attrs in readdir. File attrs are left as + // None so that lookup triggers the resolver to fetch the real file + // size. Caching placeholder file attrs (size=0) would poison + // needs_resolve(), preventing resolution on subsequent lookups. + if *kind == DirEntryType::Directory { + let now = SystemTime::now(); + let attr = FileAttr::Directory { + common: mescloud_icache::make_common_file_attr( + child_ino, + 0o755, + now, + now, + self.icache.fs_owner(), + self.icache.block_size(), + ), }; - Some((name, kind)) - }) - .collect(); - - trace!(ino, path = ?file_path, count = collected.len(), "fetched directory listing"); - - let mut entries = Vec::with_capacity(collected.len()); - for (name, kind) in &collected { - let (child_ino, _) = self.icache.ensure_child_inode(ino, OsStr::new(name), *kind); + self.icache.cache_attr(child_ino, attr).await; + } entries.push(DirEntry { ino: child_ino, name: name.clone().into(), @@ -242,14 +367,11 @@ impl Fs for RepoFs { }); } - let icb = self - .icache - .get_icb_mut(ino) - .ok_or(ReadDirError::InodeNotFound)?; - Ok(icb.children.insert(entries)) + self.readdir_buf = entries; + Ok(&self.readdir_buf) } - #[instrument(skip(self), fields(repo = %self.repo_name))] + #[instrument(name = "RepoFs::open", skip(self), fields(repo = %self.repo_name))] async fn open(&mut self, ino: Inode, _flags: OpenFlags) -> Result { if !self.icache.contains(ino) { warn!(ino, "open on unknown inode"); @@ -257,12 +379,12 @@ impl Fs for RepoFs { } debug_assert!( matches!( - self.icache.get_attr(ino), + self.icache.get_attr(ino).await, Some(FileAttr::RegularFile { .. }) | None ), "open: inode {ino} has non-file cached attr" ); - let fh = self.icache.allocate_fh(); + let fh = self.file_table.allocate(); self.open_files.insert(fh, ino); trace!(ino, fh, "assigned file handle"); Ok(OpenFile { @@ -271,7 +393,7 @@ impl Fs for RepoFs { }) } - #[instrument(skip(self), fields(repo = %self.repo_name))] + #[instrument(name = "RepoFs::read", skip(self), fields(repo = %self.repo_name))] async fn read( &mut self, ino: Inode, @@ -291,13 +413,19 @@ impl Fs for RepoFs { ); debug_assert!( matches!( - self.icache.get_attr(ino), + self.icache.get_attr(ino).await, Some(FileAttr::RegularFile { .. }) | None ), "read: inode {ino} has non-file cached attr" ); - let file_path = self.path_of_inode(ino); + let file_path = self.path_of_inode(ino).await; + + // Non-root inodes must have a resolvable path. + if ino != Self::ROOT_INO && file_path.is_none() { + warn!(ino, "read: path_of_inode returned None for non-root inode"); + return Err(ReadError::InodeNotFound); + } let content = self .client @@ -327,7 +455,7 @@ impl Fs for RepoFs { Ok(Bytes::copy_from_slice(&decoded[start..end])) } - #[instrument(skip(self), fields(repo = %self.repo_name))] + #[instrument(name = "RepoFs::release", skip(self), fields(repo = %self.repo_name))] async fn release( &mut self, ino: Inode, @@ -347,14 +475,14 @@ impl Fs for RepoFs { Ok(()) } - #[instrument(skip(self), fields(repo = %self.repo_name))] + #[instrument(name = "RepoFs::forget", skip(self), fields(repo = %self.repo_name))] async fn forget(&mut self, ino: Inode, nlookups: u64) { debug_assert!( self.icache.contains(ino), "forget: inode {ino} not in inode table" ); - self.icache.forget(ino, nlookups); + self.icache.forget(ino, nlookups).await; } async fn statfs(&mut self) -> Result { diff --git a/src/fs/mod.rs b/src/fs/mod.rs index ef40322..c68cdee 100644 --- a/src/fs/mod.rs +++ b/src/fs/mod.rs @@ -1,5 +1,6 @@ pub mod fuser; pub mod icache; -pub mod local; +// TODO: re-enable after icache refactoring is complete +// pub mod local; pub mod mescloud; pub mod r#trait; diff --git a/src/trc.rs b/src/trc.rs index 2c1b899..a504362 100644 --- a/src/trc.rs +++ b/src/trc.rs @@ -3,6 +3,10 @@ //! The tracing subscriber is built with a [`reload::Layer`] wrapping the fmt layer so that the //! output format can be switched at runtime (e.g. from pretty mode to ugly mode when daemonizing). +#[cfg(feature = "__otlp_export")] +use opentelemetry::trace::TracerProvider as _; +#[cfg(feature = "__otlp_export")] +use opentelemetry_sdk::Resource; use tracing_indicatif::IndicatifLayer; use tracing_subscriber::{ EnvFilter, Registry, @@ -39,6 +43,19 @@ impl TrcMode { /// A handle that allows reconfiguring the tracing subscriber at runtime. pub struct TrcHandle { fmt_handle: FmtReloadHandle, + #[cfg(feature = "__otlp_export")] + tracer_provider: Option, +} + +#[cfg(feature = "__otlp_export")] +impl Drop for TrcHandle { + fn drop(&mut self) { + if let Some(provider) = self.tracer_provider.take() + && let Err(e) = provider.shutdown() + { + eprintln!("Failed to shutdown OpenTelemetry tracer: {e}"); + } + } } impl TrcHandle { @@ -117,6 +134,8 @@ impl Trc { ); let (reload_layer, fmt_handle) = reload::Layer::new(initial_layer); + #[cfg(feature = "__otlp_export")] + let mut tracer_provider = None; match self.mode { TrcMode::丑 { .. } => { @@ -142,14 +161,54 @@ impl Trc { .try_init()?; } TrcMode::Ugly { .. } => { - // The initial layer is already configured for ugly mode, so just init directly. - tracing_subscriber::registry() - .with(reload_layer) - .with(self.env_filter) - .try_init()?; + #[cfg(feature = "__otlp_export")] + { + let exporter = opentelemetry_otlp::SpanExporter::builder() + .with_http() + .build() + .ok(); + + if let Some(exporter) = exporter { + let provider = opentelemetry_sdk::trace::SdkTracerProvider::builder() + .with_batch_exporter(exporter) + .with_resource( + Resource::builder_empty() + .with_service_name("git-fs") + .build(), + ) + .build(); + let tracer = provider.tracer("git-fs"); + let otel_layer = tracing_opentelemetry::layer().with_tracer(tracer); + + tracing_subscriber::registry() + .with(reload_layer) + .with(otel_layer) + .with(self.env_filter) + .try_init()?; + + tracer_provider = Some(provider); + } else { + tracing_subscriber::registry() + .with(reload_layer) + .with(self.env_filter) + .try_init()?; + } + } + + #[cfg(not(feature = "__otlp_export"))] + { + tracing_subscriber::registry() + .with(reload_layer) + .with(self.env_filter) + .try_init()?; + } } } - Ok(TrcHandle { fmt_handle }) + Ok(TrcHandle { + fmt_handle, + #[cfg(feature = "__otlp_export")] + tracer_provider, + }) } } diff --git a/uv.lock b/uv.lock index 717a703..f7de7d0 100644 --- a/uv.lock +++ b/uv.lock @@ -68,6 +68,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e3/26/57c6fb270950d476074c087527a558ccb6f4436657314bfb6cdf484114c4/docker-7.1.0-py3-none-any.whl", hash = "sha256:c96b93b7f0a746f9e77d325bcfb87422a3d8bd4f03136ae8a85b37f1898d5fc0", size = 147774, upload-time = "2024-05-23T11:13:55.01Z" }, ] +[[package]] +name = "execnet" +version = "2.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/bf/89/780e11f9588d9e7128a3f87788354c7946a9cbb1401ad38a48c4db9a4f07/execnet-2.1.2.tar.gz", hash = "sha256:63d83bfdd9a23e35b9c6a3261412324f964c2ec8dcd8d3c6916ee9373e0befcd", size = 166622, upload-time = "2025-11-12T09:56:37.75Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ab/84/02fc1827e8cdded4aa65baef11296a9bbe595c474f0d6d758af082d849fd/execnet-2.1.2-py3-none-any.whl", hash = "sha256:67fba928dd5a544b783f6056f449e5e3931a5c378b128bc18501f7ea79e296ec", size = 40708, upload-time = "2025-11-12T09:56:36.333Z" }, +] + [[package]] name = "git-fs-tests" version = "0.0.0" @@ -79,6 +88,7 @@ dev = [ { name = "pytest" }, { name = "pytest-in-docker" }, { name = "pytest-timeout" }, + { name = "pytest-xdist" }, { name = "ruff" }, ] @@ -88,8 +98,9 @@ dev = [ dev = [ { name = "pyright", specifier = ">=1.1.390" }, { name = "pytest", specifier = ">=9.0.2" }, - { name = "pytest-in-docker", specifier = ">=0.2.0" }, + { name = "pytest-in-docker", specifier = ">=0.2.1" }, { name = "pytest-timeout", specifier = ">=2.4.0" }, + { name = "pytest-xdist", specifier = ">=3.5.0" }, { name = "ruff", specifier = ">=0.9.0" }, ] @@ -190,17 +201,18 @@ wheels = [ [[package]] name = "pytest-in-docker" -version = "0.2.0" +version = "0.2.1" source = { registry = "https://pypi.org/simple" } dependencies = [ { name = "cloudpickle" }, { name = "pytest" }, + { name = "pytest-xdist" }, { name = "rpyc" }, { name = "testcontainers" }, ] -sdist = { url = "https://files.pythonhosted.org/packages/58/a0/c17c7e77c6c10c07036f38a04972dfce62cdddef42ddf9ddfe831a244e78/pytest_in_docker-0.2.0.tar.gz", hash = "sha256:20be61a5669b6c91577079fa7a6b59e7a6cd5a106177f6b3ab8e4b19e63310fb", size = 3793573, upload-time = "2026-02-09T22:26:40.489Z" } +sdist = { url = "https://files.pythonhosted.org/packages/f6/ce/dfb5a8cb7dfb317b5cf27bc03c91b9f5b9d0537da893369a78dc8219d0ac/pytest_in_docker-0.2.1.tar.gz", hash = "sha256:5f25ceb12eb98a495c1f9f4764a73dd7e86b1213e6f1324f8f9c222ae34a00b5", size = 3794359, upload-time = "2026-02-09T23:53:08.471Z" } wheels = [ - { url = "https://files.pythonhosted.org/packages/c5/2b/f32da99dfb22cf2af891fe810d9952d600de15a03bb652f67c96cf0a21c3/pytest_in_docker-0.2.0-py3-none-any.whl", hash = "sha256:e793efaa917c5a7f5d730ba2dbd8ffebc77e0aca556dfec958029b8f3fae9f7d", size = 12618, upload-time = "2026-02-09T22:26:38.738Z" }, + { url = "https://files.pythonhosted.org/packages/2f/14/180cf34e1ccb81aef0e683b1d2db3c1e292cf886eb56316b229c6f7e6f3d/pytest_in_docker-0.2.1-py3-none-any.whl", hash = "sha256:a903c3d903985fb6d446eb184a58e6fea725d49f916c18ce2762375b037c85e1", size = 12072, upload-time = "2026-02-09T23:53:06.822Z" }, ] [[package]] @@ -215,6 +227,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/fa/b6/3127540ecdf1464a00e5a01ee60a1b09175f6913f0644ac748494d9c4b21/pytest_timeout-2.4.0-py3-none-any.whl", hash = "sha256:c42667e5cdadb151aeb5b26d114aff6bdf5a907f176a007a30b940d3d865b5c2", size = 14382, upload-time = "2025-05-05T19:44:33.502Z" }, ] +[[package]] +name = "pytest-xdist" +version = "3.8.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "execnet" }, + { name = "pytest" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/78/b4/439b179d1ff526791eb921115fca8e44e596a13efeda518b9d845a619450/pytest_xdist-3.8.0.tar.gz", hash = "sha256:7e578125ec9bc6050861aa93f2d59f1d8d085595d6551c2c90b6f4fad8d3a9f1", size = 88069, upload-time = "2025-07-01T13:30:59.346Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ca/31/d4e37e9e550c2b92a9cbc2e4d0b7420a27224968580b5a447f420847c975/pytest_xdist-3.8.0-py3-none-any.whl", hash = "sha256:202ca578cfeb7370784a8c33d6d05bc6e13b4f25b5053c30a152269fd10f0b88", size = 46396, upload-time = "2025-07-01T13:30:56.632Z" }, +] + [[package]] name = "python-dotenv" version = "1.2.1"