Skip to content

Commit 71f7e4e

Browse files
authored
Fix compute_next_delay returning None when max_retry_interval is not set (#116)
1 parent 29bbc9a commit 71f7e4e

File tree

2 files changed

+156
-1
lines changed

2 files changed

+156
-1
lines changed

durabletask/task.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -468,7 +468,8 @@ def compute_next_delay(self) -> Optional[timedelta]:
468468

469469
if self._retry_policy.max_retry_interval is not None:
470470
next_delay_f = min(next_delay_f, self._retry_policy.max_retry_interval.total_seconds())
471-
return timedelta(seconds=next_delay_f)
471+
472+
return timedelta(seconds=next_delay_f)
472473

473474
return None
474475

tests/durabletask/test_orchestration_executor.py

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -410,6 +410,160 @@ def orchestrator(ctx: task.OrchestrationContext, orchestrator_input):
410410
assert actions[-1].id == 7
411411

412412

413+
def test_activity_retry_without_max_retry_interval():
414+
"""Tests that retry logic works correctly when max_retry_interval is not set.
415+
416+
This is a regression test for a bug where compute_next_delay() returned None
417+
instead of the computed delay when max_retry_interval was not specified,
418+
causing retries to silently fail.
419+
"""
420+
421+
def dummy_activity(ctx, _):
422+
raise ValueError("Kah-BOOOOM!!!")
423+
424+
def orchestrator(ctx: task.OrchestrationContext, orchestrator_input):
425+
result = yield ctx.call_activity(
426+
dummy_activity,
427+
retry_policy=task.RetryPolicy(
428+
first_retry_interval=timedelta(seconds=1),
429+
max_number_of_attempts=3,
430+
backoff_coefficient=2),
431+
input=orchestrator_input)
432+
return result
433+
434+
registry = worker._Registry()
435+
name = registry.add_orchestrator(orchestrator)
436+
437+
current_timestamp = datetime.utcnow()
438+
439+
# Simulate the task failing for the first time — retry timer should be created at 1 second
440+
old_events = [
441+
helpers.new_orchestrator_started_event(timestamp=current_timestamp),
442+
helpers.new_execution_started_event(name, TEST_INSTANCE_ID, encoded_input=None),
443+
helpers.new_task_scheduled_event(1, task.get_name(dummy_activity))]
444+
expected_fire_at = current_timestamp + timedelta(seconds=1)
445+
446+
new_events = [
447+
helpers.new_orchestrator_started_event(timestamp=current_timestamp),
448+
helpers.new_task_failed_event(1, ValueError("Kah-BOOOOM!!!"))]
449+
executor = worker._OrchestrationExecutor(registry, TEST_LOGGER)
450+
result = executor.execute(TEST_INSTANCE_ID, old_events, new_events)
451+
actions = result.actions
452+
assert len(actions) == 1
453+
assert actions[0].HasField("createTimer")
454+
assert actions[0].createTimer.fireAt.ToDatetime() == expected_fire_at
455+
assert actions[0].id == 2
456+
457+
# Simulate the timer firing and a second failure — retry timer should be at 2 seconds (backoff)
458+
current_timestamp = expected_fire_at
459+
old_events = old_events + new_events
460+
new_events = [
461+
helpers.new_orchestrator_started_event(current_timestamp),
462+
helpers.new_timer_fired_event(2, current_timestamp)]
463+
executor = worker._OrchestrationExecutor(registry, TEST_LOGGER)
464+
result = executor.execute(TEST_INSTANCE_ID, old_events, new_events)
465+
actions = result.actions
466+
assert len(actions) == 2
467+
assert actions[1].HasField("scheduleTask")
468+
assert actions[1].id == 1
469+
470+
expected_fire_at = current_timestamp + timedelta(seconds=2)
471+
old_events = old_events + new_events
472+
new_events = [
473+
helpers.new_orchestrator_started_event(current_timestamp),
474+
helpers.new_task_failed_event(1, ValueError("Kah-BOOOOM!!!"))]
475+
executor = worker._OrchestrationExecutor(registry, TEST_LOGGER)
476+
result = executor.execute(TEST_INSTANCE_ID, old_events, new_events)
477+
actions = result.actions
478+
assert len(actions) == 3
479+
assert actions[2].HasField("createTimer")
480+
assert actions[2].createTimer.fireAt.ToDatetime() == expected_fire_at
481+
assert actions[2].id == 3
482+
483+
# Simulate the timer firing and a third failure — should now fail (max_number_of_attempts=3)
484+
current_timestamp = expected_fire_at
485+
old_events = old_events + new_events
486+
new_events = [
487+
helpers.new_orchestrator_started_event(current_timestamp),
488+
helpers.new_timer_fired_event(3, current_timestamp)]
489+
executor = worker._OrchestrationExecutor(registry, TEST_LOGGER)
490+
result = executor.execute(TEST_INSTANCE_ID, old_events, new_events)
491+
actions = result.actions
492+
assert len(actions) == 3
493+
assert actions[1].HasField("scheduleTask")
494+
495+
old_events = old_events + new_events
496+
new_events = [
497+
helpers.new_orchestrator_started_event(current_timestamp),
498+
helpers.new_task_failed_event(1, ValueError("Kah-BOOOOM!!!"))]
499+
executor = worker._OrchestrationExecutor(registry, TEST_LOGGER)
500+
result = executor.execute(TEST_INSTANCE_ID, old_events, new_events)
501+
actions = result.actions
502+
assert len(actions) == 4
503+
assert actions[-1].completeOrchestration.failureDetails.errorMessage.__contains__("Activity task #1 failed: Kah-BOOOOM!!!")
504+
505+
506+
def test_activity_retry_with_default_backoff():
507+
"""Tests retry with default backoff_coefficient (1.0) and no max_retry_interval.
508+
509+
Verifies that retry delays remain constant when backoff_coefficient defaults to 1.0.
510+
"""
511+
512+
def dummy_activity(ctx, _):
513+
raise ValueError("Fail!")
514+
515+
def orchestrator(ctx: task.OrchestrationContext, _):
516+
result = yield ctx.call_activity(
517+
dummy_activity,
518+
retry_policy=task.RetryPolicy(
519+
first_retry_interval=timedelta(seconds=5),
520+
max_number_of_attempts=3))
521+
return result
522+
523+
registry = worker._Registry()
524+
name = registry.add_orchestrator(orchestrator)
525+
526+
current_timestamp = datetime.utcnow()
527+
528+
# First failure — retry timer at 5 seconds (default backoff=1.0, so 5 * 1^0 = 5)
529+
old_events = [
530+
helpers.new_orchestrator_started_event(timestamp=current_timestamp),
531+
helpers.new_execution_started_event(name, TEST_INSTANCE_ID, encoded_input=None),
532+
helpers.new_task_scheduled_event(1, task.get_name(dummy_activity))]
533+
expected_fire_at = current_timestamp + timedelta(seconds=5)
534+
535+
new_events = [
536+
helpers.new_orchestrator_started_event(timestamp=current_timestamp),
537+
helpers.new_task_failed_event(1, ValueError("Fail!"))]
538+
executor = worker._OrchestrationExecutor(registry, TEST_LOGGER)
539+
result = executor.execute(TEST_INSTANCE_ID, old_events, new_events)
540+
actions = result.actions
541+
assert len(actions) == 1
542+
assert actions[0].HasField("createTimer")
543+
assert actions[0].createTimer.fireAt.ToDatetime() == expected_fire_at
544+
545+
# Second failure — retry timer still at 5 seconds (5 * 1^1 = 5, no backoff growth)
546+
current_timestamp = expected_fire_at
547+
old_events = old_events + new_events
548+
new_events = [
549+
helpers.new_orchestrator_started_event(current_timestamp),
550+
helpers.new_timer_fired_event(2, current_timestamp)]
551+
executor = worker._OrchestrationExecutor(registry, TEST_LOGGER)
552+
result = executor.execute(TEST_INSTANCE_ID, old_events, new_events)
553+
554+
expected_fire_at = current_timestamp + timedelta(seconds=5)
555+
old_events = old_events + new_events
556+
new_events = [
557+
helpers.new_orchestrator_started_event(current_timestamp),
558+
helpers.new_task_failed_event(1, ValueError("Fail!"))]
559+
executor = worker._OrchestrationExecutor(registry, TEST_LOGGER)
560+
result = executor.execute(TEST_INSTANCE_ID, old_events, new_events)
561+
actions = result.actions
562+
assert len(actions) == 3
563+
assert actions[2].HasField("createTimer")
564+
assert actions[2].createTimer.fireAt.ToDatetime() == expected_fire_at
565+
566+
413567
def test_nondeterminism_expected_timer():
414568
"""Tests the non-determinism detection logic when call_timer is expected but some other method (call_activity) is called instead"""
415569
def dummy_activity(ctx, _):

0 commit comments

Comments
 (0)