diff --git a/Cargo.lock b/Cargo.lock index d0a18dde..130930b3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -959,9 +959,9 @@ dependencies = [ [[package]] name = "shpool_pty" -version = "0.3.1" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "01fbec35414044d1f92bfa2d8eb7faf6e658fb10eae3440e169e5a36c5969ab5" +checksum = "78cc0114fa23588602dbb02375ef5eead7bfcbdc0f1bed9b54c5e0722e197df8" dependencies = [ "errno 0.2.8", "libc", diff --git a/HACKING.md b/HACKING.md index f40fd0ea..66508a0b 100644 --- a/HACKING.md +++ b/HACKING.md @@ -212,3 +212,20 @@ leave log files in place you might run ``` $ SHPOOL_LEAVE_TEST_LOGS=true cargo test --test attach happy_path -- --nocapture ``` + +## Running Tests on macOS + +On macOS, some tests in the `config_watcher` module that rely on file +system events can be flaky when run in parallel. This is because macOS +uses FSEvents for file system notifications, which delivers events +asynchronously and with less predictable timing under concurrent load +compared to Linux's inotify. + +To run the tests reliably on macOS, use single-threaded execution: + +``` +$ cargo test -- --test-threads=1 +``` + +This ensures tests run serially and don't interfere with each other's +file system event delivery. diff --git a/libshpool/Cargo.toml b/libshpool/Cargo.toml index 68e603a5..faba32a8 100644 --- a/libshpool/Cargo.toml +++ b/libshpool/Cargo.toml @@ -28,7 +28,7 @@ serde_json = "1" # JSON output for list command toml = "0.8" # config parsing byteorder = "1" # endianness signal-hook = "0.3" # signal handling -shpool_pty = "0.3.1" # spawning shells in ptys +shpool_pty = "0.3.2" # spawning shells in ptys lazy_static = "1" # globals crossbeam-channel = "0.5" # channels libc = "0.2" # basic libc types diff --git a/libshpool/src/config.rs b/libshpool/src/config.rs index e6d166e2..b86c3fdd 100644 --- a/libshpool/src/config.rs +++ b/libshpool/src/config.rs @@ -15,7 +15,7 @@ use std::{ borrow::Cow, collections::HashMap, - env, fs, + fs, path::{Path, PathBuf}, sync::{Arc, RwLock, RwLockReadGuard}, }; @@ -155,7 +155,7 @@ impl Manager { #[cfg(not(any(target_os = "macos", target_os = "ios", target_os = "windows")))] fn config_base_dir() -> anyhow::Result { - match env::var("XDG_CONFIG_DIR") { + match std::env::var("XDG_CONFIG_DIR") { Ok(v) => Ok(PathBuf::from(v)), Err(_) => { let user_info = user::info().context("getting user info")?; diff --git a/libshpool/src/config_watcher.rs b/libshpool/src/config_watcher.rs index 16434200..232671bd 100644 --- a/libshpool/src/config_watcher.rs +++ b/libshpool/src/config_watcher.rs @@ -15,7 +15,7 @@ use anyhow::{anyhow, Context as _, Result}; use crossbeam_channel::{bounded, select, unbounded, Receiver, Sender}; use notify::{ - event::ModifyKind, recommended_watcher, Event, EventKind, INotifyWatcher, RecursiveMode, + event::ModifyKind, recommended_watcher, Event, EventKind, RecommendedWatcher, RecursiveMode, Watcher as _, }; use std::{ @@ -28,10 +28,36 @@ use tracing::{debug, error, instrument, warn}; use crate::test_hooks; +/// Canonicalize a path, resolving symlinks in the existing portion. +/// +/// This is needed because file system watchers (inotify, FSEvents, etc.) report +/// canonical paths, so we need to store canonical paths for comparison. +/// Unlike `std::fs::canonicalize`, this handles paths where the final +/// components don't exist yet by canonicalizing the longest existing prefix. +fn canonicalize_path(path: &Path) -> PathBuf { + // Try to canonicalize the whole path first + if let Ok(canonical) = path.canonicalize() { + return canonical; + } + + // Find the longest existing ancestor and canonicalize that + for ancestor in path.ancestors().skip(1) { + if let Ok(canonical_ancestor) = ancestor.canonicalize() { + // Append the remaining (non-existent) components + if let Ok(remaining) = path.strip_prefix(ancestor) { + return canonical_ancestor.join(remaining); + } + } + } + + // Fallback to original path if nothing could be canonicalized + path.to_path_buf() +} + /// Watches on `path`, returnes the watched path, which is the closest existing /// ancestor of `path`, and the immediate child that is of interest. pub fn best_effort_watch<'a>( - watcher: &mut INotifyWatcher, + watcher: &mut RecommendedWatcher, path: &'a Path, ) -> Result<(&'a Path, Option<&'a Path>)> { let mut watched_path = Err(anyhow!("empty path")); @@ -206,7 +232,7 @@ struct ConfigWatcherInner { handler: Handler, /// underlying notify-rs watcher - watcher: INotifyWatcher, + watcher: RecommendedWatcher, /// receiving notify events notify_rx: Receiver>, @@ -273,8 +299,11 @@ impl ConfigWatcherInner { return Outcome::Timeout; } - // nothing ready to act immediately, notify debug_tx - self.debug_tx.send(()).unwrap(); + // Only signal idle if there's no pending reload deadline. + // If there's a pending deadline, we have work to do (wait for timeout). + if self.reload_deadline.is_none() { + self.debug_tx.send(()).unwrap(); + } } // finally blocking wait @@ -299,7 +328,8 @@ impl ConfigWatcherInner { /// Handle add watch command from `ConfigWatcher`. fn add_watch_by_command(&mut self, path: PathBuf) -> Result<()> { - match self.paths.entry(path) { + let canonical_path = canonicalize_path(&path); + match self.paths.entry(canonical_path) { Entry::Occupied(e) => Err(anyhow!("{} is already being watched", e.key().display())), e @ Entry::Vacant(_) => { let reload = watch_and_add(&mut self.watcher, e)?; @@ -430,13 +460,16 @@ fn handle_event(event: Event, paths: &HashMap) -> ( /// failed. Note that this will overwrite any existing state. /// Return whether reload is needed. fn watch_and_add( - watcher: &mut INotifyWatcher, + watcher: &mut RecommendedWatcher, entry: Entry, ) -> Result { // make a version of watch path that doesn't retain a borrow in its return value - let best_effort_watch_owned = |watcher: &mut INotifyWatcher, path: &Path| { - best_effort_watch(watcher, path) - .map(|(w, ic)| (w.to_owned(), w.join(ic.unwrap_or_else(|| Path::new(""))))) + let best_effort_watch_owned = |watcher: &mut RecommendedWatcher, path: &Path| { + best_effort_watch(watcher, path).map(|(w, ic)| { + let watched = w.canonicalize().unwrap_or_else(|_| w.to_path_buf()); + let immediate = watched.join(ic.unwrap_or_else(|| Path::new(""))); + (watched, immediate) + }) }; match best_effort_watch_owned(watcher, entry.key()) { Ok((watched_path, immediate_child_path)) => { @@ -676,7 +709,6 @@ mod test { // Wait for watcher to do its work and drop the watcher to close the channel fn drop_watcher(watcher: ConfigWatcher) { - // sleep time larger than 1 debounce time thread::sleep(DEBOUNCE_TIME * 2); watcher.worker_ready(); } @@ -689,9 +721,8 @@ mod test { fs::create_dir_all(state.target_path.parent().unwrap()).unwrap(); state.watcher.worker_ready(); + // Write twice in quick succession - both should be within debounce window fs::write(&state.target_path, "test").unwrap(); - - state.watcher.worker_ready(); fs::write(&state.target_path, "another").unwrap(); drop_watcher(state.watcher); @@ -736,6 +767,44 @@ mod test { drop_watcher(state.watcher); let reloads: Vec<_> = state.rx.into_iter().collect(); - assert_eq!(reloads.len(), 1); + assert_eq!(reloads.len(), 1, "expected 1 reload, got {}", reloads.len()); + } + + /// Regression test: ConfigWatcher should resolve symlinks in watched paths. + /// + /// File system watchers (inotify on Linux, FSEvents on macOS) report + /// canonical paths. If we watch through a symlink, we need to canonicalize + /// the stored path to match events we receive. Without this, events are + /// missed because the symlinked path doesn't match the canonical event path. + /// + /// This commonly manifests on macOS where /var -> /private/var, but affects + /// any platform when symlinks are in the watched path. + #[test] + #[timeout(30000)] + fn symlink_path_is_canonicalized() { + use std::os::unix::fs::symlink; + + let tmpdir = tempfile::tempdir().unwrap(); + + // setup: real dir + symlink to it + let real_dir = tmpdir.path().join("real"); + fs::create_dir_all(&real_dir).unwrap(); + let link_dir = tmpdir.path().join("link"); + symlink(&real_dir, &link_dir).unwrap(); + + // watch through the symlink + let symlinked_target = link_dir.join("config.toml"); + let (tx, rx) = unbounded(); + let watcher = + ConfigWatcher::with_debounce(move || tx.send(()).unwrap(), DEBOUNCE_TIME).unwrap(); + watcher.watch(&symlinked_target).unwrap(); + + watcher.worker_ready(); + fs::write(&symlinked_target, "test content").unwrap(); + + drop_watcher(watcher); + + let reloads: Vec<_> = rx.into_iter().collect(); + assert_eq!(reloads.len(), 1, "expected 1 reload, got {}", reloads.len()); } } diff --git a/libshpool/src/daemon/prompt.rs b/libshpool/src/daemon/prompt.rs index 561ab8b5..124e8fc9 100644 --- a/libshpool/src/daemon/prompt.rs +++ b/libshpool/src/daemon/prompt.rs @@ -107,9 +107,11 @@ pub fn maybe_inject_prefix( // this rather than `echo $PROMPT_SENTINEL` because different // shells have subtly different echo behavior which makes it // hard to make the scanner work right. - // TODO(julien): this will probably not work on mac - let sentinel_cmd = - format!("\n {}=prompt /proc/{}/exe daemon\n", SENTINEL_FLAG_VAR, std::process::id()); + let exe_path = std::env::current_exe() + .context("getting current exe path")? + .to_string_lossy() + .into_owned(); + let sentinel_cmd = format!("\n {}=prompt {} daemon\n", SENTINEL_FLAG_VAR, exe_path); script.push_str(sentinel_cmd.as_str()); debug!("injecting prefix script '{}'", script); @@ -121,8 +123,11 @@ pub fn maybe_inject_prefix( #[instrument(skip_all)] fn wait_for_startup(pty_master: &mut shpool_pty::fork::Master) -> anyhow::Result<()> { let mut startup_sentinel_scanner = SentinelScanner::new(STARTUP_SENTINEL); - let startup_sentinel_cmd = - format!("\n {}=startup /proc/{}/exe daemon\n", SENTINEL_FLAG_VAR, std::process::id()); + let exe_path = std::env::current_exe() + .context("getting current exe path")? + .to_string_lossy() + .into_owned(); + let startup_sentinel_cmd = format!("\n {}=startup {} daemon\n", SENTINEL_FLAG_VAR, exe_path); pty_master .write_all(startup_sentinel_cmd.as_bytes()) diff --git a/libshpool/src/daemon/server.rs b/libshpool/src/daemon/server.rs index 19aa860e..850748e9 100644 --- a/libshpool/src/daemon/server.rs +++ b/libshpool/src/daemon/server.rs @@ -144,9 +144,21 @@ impl Server { #[instrument(skip_all, fields(cid = conn_id))] fn handle_conn(&self, mut stream: UnixStream, conn_id: usize) -> anyhow::Result<()> { // We want to avoid timing out while blocking the main thread. + // On macOS, set_read_timeout returns EINVAL if the peer has already + // closed (e.g., a daemon presence probe). This is documented in the + // macOS setsockopt(2) man page. Treat this the same as a broken pipe. + #[cfg(target_os = "macos")] + if let Err(e) = stream.set_read_timeout(Some(consts::SOCK_STREAM_TIMEOUT)) { + if e.raw_os_error() == Some(libc::EINVAL) { + info!("EINVAL setting read timeout, peer already closed (presence probe)"); + return Ok(()); + } + return Err(e).context("setting read timeout on inbound session"); + } + #[cfg(not(target_os = "macos"))] stream .set_read_timeout(Some(consts::SOCK_STREAM_TIMEOUT)) - .context("setting read timout on inbound session")?; + .context("setting read timeout on inbound session")?; // advertize our protocol version to the client so that it can // warn about mismatches @@ -1138,12 +1150,14 @@ where protocol::encode_to(&header, serializeable_stream).context("writing reply")?; stream.set_write_timeout(None).context("unsetting write timout on inbound session")?; + Ok(()) } /// check_peer makes sure that a process dialing in on the shpool /// control socket has the same UID as the current user and that /// both have the same executable path. +#[cfg(target_os = "linux")] fn check_peer(sock: &UnixStream) -> anyhow::Result<()> { use nix::sys::socket; @@ -1166,11 +1180,69 @@ fn check_peer(sock: &UnixStream) -> anyhow::Result<()> { Ok(()) } +#[cfg(target_os = "macos")] +fn check_peer(sock: &UnixStream) -> anyhow::Result<()> { + use std::os::unix::io::AsRawFd; + + let mut peer_uid: libc::uid_t = 0; + let mut peer_gid: libc::gid_t = 0; + unsafe { + if libc::getpeereid(sock.as_raw_fd(), &mut peer_uid, &mut peer_gid) != 0 { + return Err(anyhow!( + "could not get peer uid from socket: {}", + io::Error::last_os_error() + )); + } + } + let peer_uid = unistd::Uid::from_raw(peer_uid); + let self_uid = unistd::Uid::current(); + if peer_uid != self_uid { + return Err(anyhow!("shpool prohibits connections across users")); + } + + let mut peer_pid: libc::pid_t = 0; + let mut len = std::mem::size_of::() as libc::socklen_t; + unsafe { + if libc::getsockopt( + sock.as_raw_fd(), + libc::SOL_LOCAL, + libc::LOCAL_PEERPID, + &mut peer_pid as *mut _ as *mut libc::c_void, + &mut len, + ) != 0 + { + return Err(anyhow!( + "could not get peer pid from socket: {}", + io::Error::last_os_error() + )); + } + } + + let peer_pid = unistd::Pid::from_raw(peer_pid); + let self_pid = unistd::Pid::this(); + let peer_exe = exe_for_pid(peer_pid).context("could not resolve exe from the pid")?; + let self_exe = exe_for_pid(self_pid).context("could not resolve our own exe")?; + if peer_exe != self_exe { + warn!("attach binary differs from daemon binary"); + } + + Ok(()) +} + +#[cfg(target_os = "linux")] fn exe_for_pid(pid: unistd::Pid) -> anyhow::Result { let path = std::fs::read_link(format!("/proc/{pid}/exe"))?; Ok(path) } +#[cfg(target_os = "macos")] +fn exe_for_pid(pid: unistd::Pid) -> anyhow::Result { + use libproc::proc_pid::pidpath; + let path = pidpath(pid.as_raw()) + .map_err(|e| anyhow!("could not get exe path for pid {}: {:?}", pid, e))?; + Ok(PathBuf::from(path)) +} + #[derive(Debug, Clone, PartialEq, Eq)] pub enum ShellSelectionError { BusyShellSession, diff --git a/libshpool/src/user.rs b/libshpool/src/user.rs index f06613d2..8b59202c 100644 --- a/libshpool/src/user.rs +++ b/libshpool/src/user.rs @@ -33,6 +33,12 @@ pub fn info() -> anyhow::Result { pw_gecos: ptr::null_mut(), pw_dir: ptr::null_mut(), pw_shell: ptr::null_mut(), + #[cfg(any(target_os = "macos", target_os = "ios", target_os = "freebsd", target_os = "netbsd", target_os = "openbsd"))] + pw_change: 0, + #[cfg(any(target_os = "macos", target_os = "ios", target_os = "freebsd", target_os = "netbsd", target_os = "openbsd"))] + pw_class: ptr::null_mut(), + #[cfg(any(target_os = "macos", target_os = "ios", target_os = "freebsd", target_os = "netbsd", target_os = "openbsd"))] + pw_expire: 0, }; let mut passwd_res_ptr: *mut libc::passwd = ptr::null_mut(); unsafe { diff --git a/shpool/Cargo.toml b/shpool/Cargo.toml index 834898e6..2cb0b7ea 100644 --- a/shpool/Cargo.toml +++ b/shpool/Cargo.toml @@ -15,6 +15,9 @@ rust-version = "1.74" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[features] +test_hooks = ["libshpool/test_hooks"] + [dependencies] clap = { version = "4", features = ["derive"] } # cli parsing anyhow = "1" # dynamic, unstructured errors diff --git a/shpool/tests/support/daemon.rs b/shpool/tests/support/daemon.rs index 17cb3236..ee6e0c7f 100644 --- a/shpool/tests/support/daemon.rs +++ b/shpool/tests/support/daemon.rs @@ -1,3 +1,6 @@ +// NOTE: Socket paths in this module are kept short to stay under sun_path +// limits (~104-108 bytes). macOS compounds this with long temp prefixes. + use std::{ default::Default, env, @@ -110,7 +113,7 @@ impl Proc { let tmp_dir = local_tmp_dir.path().to_path_buf(); let socket_path = tmp_dir.join("shpool.socket"); - let test_hook_socket_path = tmp_dir.join("shpool-daemon-test-hook.socket"); + let test_hook_socket_path = tmp_dir.join("hook.sock"); let log_file = tmp_dir.join("daemon.log"); eprintln!("spawning daemon proc with log {:?}", &log_file); @@ -279,7 +282,7 @@ impl Proc { pub fn attach(&mut self, name: &str, args: AttachArgs) -> anyhow::Result { let log_file = self.tmp_dir.join(format!("attach_{}_{}.log", name, self.subproc_counter)); let test_hook_socket_path = - self.tmp_dir.join(format!("attach_test_hook_{}_{}.socket", name, self.subproc_counter)); + self.tmp_dir.join(format!("ah{}_{}.sock", name, self.subproc_counter)); eprintln!("spawning attach proc with log {:?}", &log_file); self.subproc_counter += 1; @@ -301,6 +304,14 @@ impl Proc { if let Ok(xdg_runtime_dir) = env::var("XDG_RUNTIME_DIR") { cmd.env("XDG_RUNTIME_DIR", xdg_runtime_dir); } + // HOME is needed on macOS (where XDG_RUNTIME_DIR is typically unset) + if let Ok(home) = env::var("HOME") { + cmd.env("HOME", home); + } + // PATH is needed for the shell subprocess + if let Ok(path) = env::var("PATH") { + cmd.env("PATH", path); + } if args.force { cmd.arg("-f"); }