Skip to content

Commit 4332743

Browse files
committed
feat(supervisor): add flag to enable compute snapshots
Gates snapshot/restore behaviour independently of compute mode. When disabled, VMs won't receive the metadata URL and suspend/restore are no-ops. Defaults to off so compute mode can be used without snapshots.
1 parent 0a6d6f1 commit 4332743

File tree

4 files changed

+14
-9
lines changed

4 files changed

+14
-9
lines changed

apps/supervisor/src/env.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -81,6 +81,7 @@ const Env = z.object({
8181
COMPUTE_GATEWAY_URL: z.string().url().optional(),
8282
COMPUTE_GATEWAY_AUTH_TOKEN: z.string().optional(),
8383
COMPUTE_GATEWAY_TIMEOUT_MS: z.coerce.number().int().default(30_000),
84+
COMPUTE_SNAPSHOTS_ENABLED: BoolEnv.default(false),
8485

8586
// Kubernetes settings
8687
KUBERNETES_FORCE_ENABLED: BoolEnv.default(false),

apps/supervisor/src/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -223,7 +223,7 @@ class ManagedSupervisor {
223223
if (checkpoint) {
224224
this.logger.log("Restoring run", { runId: message.run.id });
225225

226-
if (this.isComputeMode && this.computeManager) {
226+
if (this.isComputeMode && this.computeManager && env.COMPUTE_SNAPSHOTS_ENABLED) {
227227
try {
228228
// Derive runnerId unique per restore cycle (matches iceman's pattern)
229229
const runIdShort = message.run.friendlyId.replace("run_", "");

apps/supervisor/src/workloadManager/compute.ts

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ export class ComputeWorkloadManager implements WorkloadManager {
5151
envVars.TRIGGER_WARM_START_URL = this.opts.warmStartUrl;
5252
}
5353

54-
if (this.opts.metadataUrl) {
54+
if (env.COMPUTE_SNAPSHOTS_ENABLED && this.opts.metadataUrl) {
5555
envVars.TRIGGER_METADATA_URL = this.opts.metadataUrl;
5656
}
5757

@@ -266,17 +266,21 @@ export class ComputeWorkloadManager implements WorkloadManager {
266266
TRIGGER_WORKER_INSTANCE_NAME: env.TRIGGER_WORKER_INSTANCE_NAME,
267267
};
268268

269+
const body = {
270+
name: opts.runnerId,
271+
metadata,
272+
cpu: opts.machine.cpu,
273+
memory_mb: opts.machine.memory * 1024,
274+
};
275+
276+
this.logger.debug("restore request body", { url, body });
277+
269278
const [error, response] = await tryCatch(
270279
fetch(url, {
271280
method: "POST",
272281
headers: this.authHeaders,
273282
signal: AbortSignal.timeout(this.opts.gatewayTimeoutMs),
274-
body: JSON.stringify({
275-
name: opts.runnerId,
276-
metadata,
277-
cpu: opts.machine.cpu,
278-
memory_mb: opts.machine.memory * 1024,
279-
}),
283+
body: JSON.stringify(body),
280284
})
281285
);
282286

apps/supervisor/src/workloadServer/index.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -263,7 +263,7 @@ export class WorkloadServer extends EventEmitter<WorkloadServerEvents> {
263263
return;
264264
}
265265

266-
if (this.computeManager) {
266+
if (this.computeManager && env.COMPUTE_SNAPSHOTS_ENABLED) {
267267
// Compute mode: fire-and-forget snapshot with callback
268268
reply.json({ ok: true } satisfies WorkloadSuspendRunResponseBody, false, 202);
269269

0 commit comments

Comments
 (0)