@@ -100,15 +100,15 @@ export type RunQueueOptions = {
100100 pollIntervalMs ?: number ;
101101 /** Max number of runs to expire per poll per shard (default: 100) */
102102 batchSize ?: number ;
103- /** Callback to handle expired runs */
104- callback : TtlSystemCallback ;
103+ /** Key suffix for TTL worker's queue sorted set (relative to RunQueue keyPrefix) */
104+ workerQueueSuffix : string ;
105+ /** Key suffix for TTL worker's items hash (relative to RunQueue keyPrefix) */
106+ workerItemsSuffix : string ;
107+ /** Visibility timeout for TTL worker jobs (ms, default: 30000) */
108+ visibilityTimeoutMs ?: number ;
105109 } ;
106110} ;
107111
108- export interface TtlSystemCallback {
109- ( runs : Array < { queueKey : string ; runId : string ; orgId : string } > ) : Promise < void > ;
110- }
111-
112112export interface ConcurrencySweeperCallback {
113113 ( runIds : string [ ] ) : Promise < Array < { id : string ; orgId : string } > > ;
114114}
@@ -1289,19 +1289,7 @@ export class RunQueue {
12891289 shard,
12901290 count : expiredRuns . length ,
12911291 } ) ;
1292-
1293- // Call the callback with expired runs
1294- try {
1295- await this . options . ttlSystem ! . callback ( expiredRuns ) ;
1296- processedCount += expiredRuns . length ;
1297- } catch ( callbackError ) {
1298- this . logger . error ( `TTL callback failed for shard ${ shard } ` , {
1299- error : callbackError ,
1300- service : this . name ,
1301- shard,
1302- runCount : expiredRuns . length ,
1303- } ) ;
1304- }
1292+ processedCount += expiredRuns . length ;
13051293 }
13061294 }
13071295 } catch ( error ) {
@@ -1318,24 +1306,36 @@ export class RunQueue {
13181306 }
13191307
13201308 /**
1321- * Atomically expire TTL runs: removes from TTL set AND acknowledges from normal queue.
1322- * This prevents race conditions with the normal dequeue system .
1309+ * Atomically expire TTL runs: removes from TTL set, acknowledges from normal queue,
1310+ * and enqueues each run to the TTL worker for DB updates .
13231311 */
13241312 async #expireTtlRuns(
13251313 shard : number ,
13261314 now : number ,
13271315 batchSize : number
13281316 ) : Promise < Array < { queueKey : string ; runId : string ; orgId : string } > > {
1329- const shardCount = this . options . ttlSystem ?. shardCount ?? this . shardCount ;
1317+ const ttlSystem = this . options . ttlSystem ;
1318+ if ( ! ttlSystem ) {
1319+ return [ ] ;
1320+ }
1321+
1322+ const shardCount = ttlSystem . shardCount ?? this . shardCount ;
13301323 const ttlQueueKey = this . keys . ttlQueueKeyForShard ( shard ) ;
1324+ const keyPrefix = this . options . redis . keyPrefix ?? "" ;
1325+ const workerQueueKey = keyPrefix + ttlSystem . workerQueueSuffix ;
1326+ const workerItemsKey = keyPrefix + ttlSystem . workerItemsSuffix ;
1327+ const visibilityTimeoutMs = ( ttlSystem . visibilityTimeoutMs ?? 30_000 ) . toString ( ) ;
13311328
1332- // Atomically get and remove expired runs from TTL set, and ack them from normal queues
1329+ // Atomically get and remove expired runs from TTL set, ack them from normal queues, and enqueue to TTL worker
13331330 const results = await this . redis . expireTtlRuns (
13341331 ttlQueueKey ,
1335- this . options . redis . keyPrefix ?? "" ,
1332+ keyPrefix ,
13361333 now . toString ( ) ,
13371334 batchSize . toString ( ) ,
1338- shardCount . toString ( )
1335+ shardCount . toString ( ) ,
1336+ workerQueueKey ,
1337+ workerItemsKey ,
1338+ visibilityTimeoutMs
13391339 ) ;
13401340
13411341 if ( ! results || results . length === 0 ) {
@@ -2587,7 +2587,7 @@ redis.call('SREM', envCurrentDequeuedKey, messageId)
25872587 ` ,
25882588 } ) ;
25892589
2590- // Expire TTL runs - atomically removes from TTL set and acknowledges from normal queue
2590+ // Expire TTL runs - atomically removes from TTL set, acknowledges from normal queue, and enqueues to TTL worker
25912591 this . redis . defineCommand ( "expireTtlRuns" , {
25922592 numberOfKeys : 1 ,
25932593 lua : `
@@ -2596,6 +2596,9 @@ local keyPrefix = ARGV[1]
25962596local currentTime = tonumber(ARGV[2])
25972597local batchSize = tonumber(ARGV[3])
25982598local shardCount = tonumber(ARGV[4])
2599+ local workerQueueKey = ARGV[5]
2600+ local workerItemsKey = ARGV[6]
2601+ local visibilityTimeoutMs = tonumber(ARGV[7])
25992602
26002603-- Get expired runs from TTL sorted set (score <= currentTime)
26012604local expiredMembers = redis.call('ZRANGEBYSCORE', ttlQueueKey, '-inf', currentTime, 'LIMIT', 0, batchSize)
@@ -2604,6 +2607,9 @@ if #expiredMembers == 0 then
26042607 return {}
26052608end
26062609
2610+ local time = redis.call('TIME')
2611+ local nowMs = tonumber(time[1]) * 1000 + math.floor(tonumber(time[2]) / 1000)
2612+
26072613local results = {}
26082614
26092615for i, member in ipairs(expiredMembers) do
@@ -2656,6 +2662,16 @@ for i, member in ipairs(expiredMembers) do
26562662 redis.call('SREM', envConcurrencyKey, runId)
26572663 redis.call('SREM', envDequeuedKey, runId)
26582664
2665+ -- Enqueue to TTL worker (runId is natural dedup key)
2666+ local serializedItem = cjson.encode({
2667+ job = "expireTtlRun",
2668+ item = { runId = runId, orgId = orgId, queueKey = rawQueueKey },
2669+ visibilityTimeoutMs = visibilityTimeoutMs,
2670+ attempt = 0
2671+ })
2672+ redis.call('ZADD', workerQueueKey, nowMs, runId)
2673+ redis.call('HSET', workerItemsKey, runId, serializedItem)
2674+
26592675 -- Add to results
26602676 table.insert(results, member)
26612677 end
@@ -3151,6 +3167,9 @@ declare module "@internal/redis" {
31513167 currentTime : string ,
31523168 batchSize : string ,
31533169 shardCount : string ,
3170+ workerQueueKey : string ,
3171+ workerItemsKey : string ,
3172+ visibilityTimeoutMs : string ,
31543173 callback ?: Callback < string [ ] >
31553174 ) : Result < string [ ] , Context > ;
31563175
0 commit comments