diff --git a/src/adapters/node-process-manager.test.ts b/src/adapters/node-process-manager.test.ts index b4c0f76..847a270 100644 --- a/src/adapters/node-process-manager.test.ts +++ b/src/adapters/node-process-manager.test.ts @@ -152,10 +152,11 @@ describe("NodeProcessManager", () => { handle.kill("SIGTERM"); await handle.exited; - // Grandchild must also be dead (killed via process group) - await vi.waitFor(() => expect(manager.isAlive(grandchildPid)).toBe(false), { - timeout: 2000, - }); + // Grandchild must be dead synchronously after exited resolves — no + // extra waiting. The previous implementation resolved exited as soon + // as the Node wrapper exited, leaving the Go-binary-like grandchild + // alive for a few more seconds (the orphan bug). + expect(manager.isAlive(grandchildPid)).toBe(false); try { unlinkSync(pidFile); diff --git a/src/adapters/node-process-manager.ts b/src/adapters/node-process-manager.ts index 9a2682e..0d266be 100644 --- a/src/adapters/node-process-manager.ts +++ b/src/adapters/node-process-manager.ts @@ -2,6 +2,50 @@ import { spawn as nodeSpawn } from "node:child_process"; import { Readable } from "node:stream"; import type { ProcessHandle, ProcessManager, SpawnOptions } from "../interfaces/process-manager.js"; +/** + * Polls until no process in the group with the given PGID is alive, or until + * timeoutMs elapses. Resolves in both cases — the caller (killProcess) handles + * any remaining stragglers via SIGKILL. + * + * Only meaningful when the child was spawned with detached:true so that + * child.pid === child's PGID (it is the process group leader). + */ +const PROCESS_GROUP_POLL_INTERVAL_MS = 50; + +function waitForProcessGroupDead(pgid: number, timeoutMs = 30_000): Promise { + return new Promise((resolve) => { + const deadline = Date.now() + timeoutMs; + const poll = () => { + try { + process.kill(-pgid, 0); // no-op if group still exists; throws if gone + if (Date.now() >= deadline) { + resolve(); + return; + } + setTimeout(poll, PROCESS_GROUP_POLL_INTERVAL_MS); + } catch (err: unknown) { + const code = (err as NodeJS.ErrnoException).code; + if (code === "ESRCH") { + // Group is gone — all members have exited. + resolve(); + } else if (code === "EPERM") { + // Permission denied: the group still exists but we cannot signal it. + // Keep polling so the killProcess() caller can escalate to SIGKILL. + if (Date.now() >= deadline) { + resolve(); + } else { + setTimeout(poll, PROCESS_GROUP_POLL_INTERVAL_MS); + } + } else { + // Unexpected error — resolve to avoid an infinite loop. + resolve(); + } + } + }; + poll(); + }); +} + /** * Node.js process manager using child_process.spawn. * Requires Node 22+ for Readable.toWeb(). @@ -37,11 +81,18 @@ export class NodeProcessManager implements ProcessManager { ? (Readable.toWeb(child.stderr) as ReadableStream) : null; - // Fabricate the exited Promise from the "exit" event + // Fabricate the exited Promise. It resolves only after the *entire process + // group* is gone — not just the direct child. This matters for wrapper + // binaries (e.g. the opencode Node shim) that use spawnSync internally: + // the direct child exits on SIGTERM while the grandchild (Go binary) is + // still doing its graceful shutdown. Without this extra wait, killProcess() + // returns too early and the grandchild appears as an orphan. const exited = new Promise((resolve) => { child.on("exit", (code, signal) => { - // null code when killed by signal - resolve(signal ? null : (code ?? null)); + const exitCode = signal ? null : (code ?? null); + // After the direct child exits, wait for any remaining members of the + // process group (grandchildren spawned via spawnSync) to also exit. + waitForProcessGroupDead(pid).then(() => resolve(exitCode)); }); child.on("error", () => { resolve(null);