Skip to content

Commit ad9cf28

Browse files
committed
prevent checkpoint creation for resumed batches
1 parent 9a101b3 commit ad9cf28

File tree

1 file changed

+49
-2
lines changed

1 file changed

+49
-2
lines changed

apps/webapp/app/v3/services/createCheckpoint.server.ts

Lines changed: 49 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -93,6 +93,55 @@ export class CreateCheckpointService extends BaseService {
9393
};
9494
}
9595

96+
const { reason } = params;
97+
98+
// Check if we should accept this checkpoint
99+
switch (reason.type) {
100+
case "MANUAL":
101+
// Always accept manual checkpoints
102+
break;
103+
case "WAIT_FOR_DURATION":
104+
// Always accept duration checkpoints
105+
break;
106+
case "WAIT_FOR_TASK": {
107+
// TODO
108+
break;
109+
}
110+
case "WAIT_FOR_BATCH": {
111+
const batchRun = await this._prisma.batchTaskRun.findFirst({
112+
where: {
113+
friendlyId: reason.batchFriendlyId,
114+
},
115+
});
116+
117+
if (!batchRun) {
118+
logger.error("CreateCheckpointService: Batch not found", {
119+
batchFriendlyId: reason.batchFriendlyId,
120+
params,
121+
});
122+
123+
return {
124+
success: false,
125+
keepRunAlive: true,
126+
};
127+
}
128+
129+
if (batchRun.batchVersion === "v3" && batchRun.resumedAt) {
130+
logger.error("CreateCheckpointService: Batch already resumed", {
131+
batchRun,
132+
params,
133+
});
134+
135+
return {
136+
success: false,
137+
keepRunAlive: true,
138+
};
139+
}
140+
141+
break;
142+
}
143+
}
144+
96145
//sleep to test slow checkpoints
97146
// Sleep a random value between 4 and 30 seconds
98147
// await new Promise((resolve) => {
@@ -146,8 +195,6 @@ export class CreateCheckpointService extends BaseService {
146195
},
147196
});
148197

149-
const { reason } = params;
150-
151198
let checkpointEvent: CheckpointRestoreEvent | undefined;
152199

153200
switch (reason.type) {

0 commit comments

Comments
 (0)