From 8a26f008242999d2003dd5106fb7ae086434c09c Mon Sep 17 00:00:00 2001 From: Teng Lin Date: Wed, 25 Feb 2026 01:08:57 -0500 Subject: [PATCH 1/2] fix: wait for entire process group before resolving exited promise MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #135 fixed signal delivery (process group kill reaches the Go binary) but missed the timing issue: proc.exited resolved as soon as the Node wrapper exited — nearly instantly after SIGTERM because Node's default handler terminates synchronously. The Go binary (the actual opencode serve server) received SIGTERM too but spent another 1–5 seconds on its graceful HTTP shutdown, so killProcess() returned while the Go binary was still alive and port-bound → observable orphan. Fix: add waitForProcessGroupDead() which polls process.kill(-pgid, 0) every 50 ms until the process group has no more members (ESRCH). The exited promise now chains through this poll after child.on("exit") fires, so callers get a true "everything in the group is dead" signal. If the Go binary outlasts killGracePeriodMs, the existing SIGKILL escalation kills the group and the poll sees ESRCH immediately. Update the grandchild unit test to assert the grandchild is dead synchronously after awaiting exited (no vi.waitFor safety net), which would have caught this bug. --- src/adapters/node-process-manager.test.ts | 9 ++--- src/adapters/node-process-manager.ts | 41 +++++++++++++++++++++-- 2 files changed, 43 insertions(+), 7 deletions(-) diff --git a/src/adapters/node-process-manager.test.ts b/src/adapters/node-process-manager.test.ts index b4c0f76..847a270 100644 --- a/src/adapters/node-process-manager.test.ts +++ b/src/adapters/node-process-manager.test.ts @@ -152,10 +152,11 @@ describe("NodeProcessManager", () => { handle.kill("SIGTERM"); await handle.exited; - // Grandchild must also be dead (killed via process group) - await vi.waitFor(() => expect(manager.isAlive(grandchildPid)).toBe(false), { - timeout: 2000, - }); + // Grandchild must be dead synchronously after exited resolves — no + // extra waiting. The previous implementation resolved exited as soon + // as the Node wrapper exited, leaving the Go-binary-like grandchild + // alive for a few more seconds (the orphan bug). + expect(manager.isAlive(grandchildPid)).toBe(false); try { unlinkSync(pidFile); diff --git a/src/adapters/node-process-manager.ts b/src/adapters/node-process-manager.ts index 9a2682e..58605a5 100644 --- a/src/adapters/node-process-manager.ts +++ b/src/adapters/node-process-manager.ts @@ -2,6 +2,34 @@ import { spawn as nodeSpawn } from "node:child_process"; import { Readable } from "node:stream"; import type { ProcessHandle, ProcessManager, SpawnOptions } from "../interfaces/process-manager.js"; +/** + * Polls until no process in the group with the given PGID is alive, or until + * timeoutMs elapses. Resolves in both cases — the caller (killProcess) handles + * any remaining stragglers via SIGKILL. + * + * Only meaningful when the child was spawned with detached:true so that + * child.pid === child's PGID (it is the process group leader). + */ +function waitForProcessGroupDead(pgid: number, timeoutMs = 30_000): Promise { + return new Promise((resolve) => { + const deadline = Date.now() + timeoutMs; + const poll = () => { + try { + process.kill(-pgid, 0); // no-op if group still exists; throws if gone + if (Date.now() >= deadline) { + resolve(); + return; + } + setTimeout(poll, 50); + } catch { + // ESRCH: group is gone; EPERM: can't signal (treat as gone) + resolve(); + } + }; + poll(); + }); +} + /** * Node.js process manager using child_process.spawn. * Requires Node 22+ for Readable.toWeb(). @@ -37,11 +65,18 @@ export class NodeProcessManager implements ProcessManager { ? (Readable.toWeb(child.stderr) as ReadableStream) : null; - // Fabricate the exited Promise from the "exit" event + // Fabricate the exited Promise. It resolves only after the *entire process + // group* is gone — not just the direct child. This matters for wrapper + // binaries (e.g. the opencode Node shim) that use spawnSync internally: + // the direct child exits on SIGTERM while the grandchild (Go binary) is + // still doing its graceful shutdown. Without this extra wait, killProcess() + // returns too early and the grandchild appears as an orphan. const exited = new Promise((resolve) => { child.on("exit", (code, signal) => { - // null code when killed by signal - resolve(signal ? null : (code ?? null)); + const exitCode = signal ? null : (code ?? null); + // After the direct child exits, wait for any remaining members of the + // process group (grandchildren spawned via spawnSync) to also exit. + waitForProcessGroupDead(pid).then(() => resolve(exitCode)); }); child.on("error", () => { resolve(null); From 03de39eba3c5600086d66b30b109453ca5cb5331 Mon Sep 17 00:00:00 2001 From: Teng Lin Date: Wed, 25 Feb 2026 01:12:30 -0500 Subject: [PATCH 2/2] =?UTF-8?q?fix:=20address=20review=20=E2=80=94=20separ?= =?UTF-8?q?ate=20EPERM=20from=20ESRCH=20and=20extract=20poll=20interval=20?= =?UTF-8?q?constant?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- src/adapters/node-process-manager.ts | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/src/adapters/node-process-manager.ts b/src/adapters/node-process-manager.ts index 58605a5..0d266be 100644 --- a/src/adapters/node-process-manager.ts +++ b/src/adapters/node-process-manager.ts @@ -10,6 +10,8 @@ import type { ProcessHandle, ProcessManager, SpawnOptions } from "../interfaces/ * Only meaningful when the child was spawned with detached:true so that * child.pid === child's PGID (it is the process group leader). */ +const PROCESS_GROUP_POLL_INTERVAL_MS = 50; + function waitForProcessGroupDead(pgid: number, timeoutMs = 30_000): Promise { return new Promise((resolve) => { const deadline = Date.now() + timeoutMs; @@ -20,10 +22,24 @@ function waitForProcessGroupDead(pgid: number, timeoutMs = 30_000): Promise= deadline) { + resolve(); + } else { + setTimeout(poll, PROCESS_GROUP_POLL_INTERVAL_MS); + } + } else { + // Unexpected error — resolve to avoid an infinite loop. + resolve(); + } } }; poll();