Skip to content

Commit 0ede985

Browse files
torosentCopilot
andcommitted
fix: span durations for timers, client spans, and trace hierarchy
Address PR #266 review feedback (comment #3993852725): 1. Timer spans now have proper duration from creation to fired time, using TimerCreated event timestamp as setStartTimestamp(). 2. Activity/sub-orchestration client spans now have proper duration from scheduling to completion, created retroactively at completion time with TaskScheduled event timestamp as setStartTimestamp(). 3. Removed instantaneous client span creation at scheduling time; propagate orchestration's parentTraceContext directly instead. Note: Java OTel doesn't support SetSpanId() like .NET, so child spans are siblings under create_orchestration rather than nested under the orchestration span. All 15 spans have meaningful durations. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 11ea4f0 commit 0ede985

File tree

7 files changed

+250
-53
lines changed

7 files changed

+250
-53
lines changed

client/src/main/java/com/microsoft/durabletask/DurableTaskGrpcWorker.java

Lines changed: 7 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -196,18 +196,20 @@ public void startAndBlock() {
196196
try {
197197
taskOrchestratorResult = taskOrchestrationExecutor.execute(
198198
orchestratorRequest.getPastEventsList(),
199-
orchestratorRequest.getNewEventsList());
199+
orchestratorRequest.getNewEventsList(),
200+
null);
200201
} catch (Throwable e) {
201202
if (e instanceof Error) {
202203
throw (Error) e;
203204
}
204205
throw new RuntimeException(e);
205206
}
206207

207-
// Emit orchestration span only when the orchestration completes or is terminated.
208-
// Uses ExecutionStartedEvent timestamp as span start time so the span covers
209-
// the full orchestration lifecycle (from creation to completion),
210-
// matching the .NET SDK behavior.
208+
// Emit orchestration span only on completion/termination.
209+
// Uses ExecutionStartedEvent timestamp as span start time for full lifecycle coverage.
210+
// Note: Java OTel doesn't support SetSpanId(), so we can't make this span
211+
// the parent of child spans across dispatches like .NET does. Child spans
212+
// (activities, timers) are siblings under the create_orchestration span.
211213
boolean isCompleting = taskOrchestratorResult.getActions().stream()
212214
.anyMatch(a -> a.getOrchestratorActionTypeCase() == OrchestratorAction.OrchestratorActionTypeCase.COMPLETEORCHESTRATION
213215
|| a.getOrchestratorActionTypeCase() == OrchestratorAction.OrchestratorActionTypeCase.TERMINATEORCHESTRATION);
@@ -218,7 +220,6 @@ public void startAndBlock() {
218220
orchSpanAttrs.put(TracingHelper.ATTR_TASK_NAME, orchName);
219221
orchSpanAttrs.put(TracingHelper.ATTR_INSTANCE_ID, orchestratorRequest.getInstanceId());
220222

221-
// Use the ExecutionStartedEvent timestamp as the orchestration span start time
222223
Instant spanStartTime = null;
223224
if (startedHistoryEvent.hasTimestamp()) {
224225
spanStartTime = DataConverter.getInstantFromTimestamp(

client/src/main/java/com/microsoft/durabletask/OrchestrationRunner.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,8 @@ public TaskOrchestration create() {
181181
try {
182182
taskOrchestratorResult = taskOrchestrationExecutor.execute(
183183
orchestratorRequest.getPastEventsList(),
184-
orchestratorRequest.getNewEventsList());
184+
orchestratorRequest.getNewEventsList(),
185+
TracingHelper.getCurrentTraceContext(orchestrationSpan));
185186
} catch (Exception e) {
186187
orchestrationError = e;
187188
throw e instanceof RuntimeException ? (RuntimeException) e : new RuntimeException(e);

client/src/main/java/com/microsoft/durabletask/TaskOrchestrationExecutor.java

Lines changed: 135 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,11 @@ public TaskOrchestrationExecutor(
4545
this.versioningOptions = versioningOptions;
4646
}
4747

48-
public TaskOrchestratorResult execute(List<HistoryEvent> pastEvents, List<HistoryEvent> newEvents) {
49-
ContextImplTask context = new ContextImplTask(pastEvents, newEvents);
48+
public TaskOrchestratorResult execute(
49+
List<HistoryEvent> pastEvents,
50+
List<HistoryEvent> newEvents,
51+
@Nullable TraceContext orchestrationSpanContext) {
52+
ContextImplTask context = new ContextImplTask(pastEvents, newEvents, orchestrationSpanContext);
5053

5154
if (this.versioningOptions != null && this.versioningOptions.getDefaultVersion() != null) {
5255
// Set the default version for the orchestrator
@@ -109,8 +112,18 @@ private class ContextImplTask implements TaskOrchestrationContext {
109112
private Object customStatus;
110113
private TraceContext parentTraceContext;
111114

112-
public ContextImplTask(List<HistoryEvent> pastEvents, List<HistoryEvent> newEvents) {
115+
// Stores scheduling metadata (timestamp, name, parentTraceContext) for retroactive client spans
116+
private final HashMap<Integer, ScheduledTaskInfo> scheduledTaskInfoMap = new HashMap<>();
117+
// Stores timer creation timestamps for timer spans with duration
118+
private final HashMap<Integer, Instant> timerCreationTimes = new HashMap<>();
119+
120+
// Orchestration span context for proper parent-child relationships in traces
121+
private TraceContext orchestrationSpanContext;
122+
123+
public ContextImplTask(List<HistoryEvent> pastEvents, List<HistoryEvent> newEvents,
124+
@Nullable TraceContext orchestrationSpanContext) {
113125
this.historyEventPlayer = new OrchestrationHistoryIterator(pastEvents, newEvents);
126+
this.orchestrationSpanContext = orchestrationSpanContext;
114127
}
115128

116129
@Override
@@ -301,21 +314,13 @@ public <V> Task<V> callActivity(
301314
TaskFactory<V> taskFactory = () -> {
302315
int id = this.sequenceNumber++;
303316

304-
// Create a Client-kind span for scheduling (mirrors .NET paired Client+Server spans)
305-
// Only create during non-replay to avoid duplicate spans on orchestration replay
306-
if (this.parentTraceContext != null && !this.isReplaying) {
307-
TraceContext clientCtx = TracingHelper.createClientSpan(
308-
"activity:" + name,
309-
this.parentTraceContext,
310-
TracingHelper.TYPE_ACTIVITY,
311-
name,
312-
this.instanceId,
313-
id);
314-
if (clientCtx != null) {
315-
scheduleTaskBuilder.setParentTraceContext(clientCtx);
316-
}
317-
} else if (this.parentTraceContext != null) {
318-
scheduleTaskBuilder.setParentTraceContext(this.parentTraceContext);
317+
// Propagate the orchestration span's context so the Server span on the worker
318+
// is a child of the orchestration span. Falls back to parentTraceContext if no
319+
// orchestration span context is available.
320+
TraceContext propagatedCtx = this.orchestrationSpanContext != null
321+
? this.orchestrationSpanContext : this.parentTraceContext;
322+
if (propagatedCtx != null) {
323+
scheduleTaskBuilder.setParentTraceContext(propagatedCtx);
319324
}
320325

321326
this.pendingActions.put(id, OrchestratorAction.newBuilder()
@@ -441,21 +446,12 @@ public <V> Task<V> callSubOrchestrator(
441446
TaskFactory<V> taskFactory = () -> {
442447
int id = this.sequenceNumber++;
443448

444-
// Create a Client-kind span for scheduling (mirrors .NET paired Client+Server spans)
445-
// Only create during non-replay to avoid duplicate spans on orchestration replay
446-
if (this.parentTraceContext != null && !this.isReplaying) {
447-
TraceContext clientCtx = TracingHelper.createClientSpan(
448-
"orchestration:" + name,
449-
this.parentTraceContext,
450-
TracingHelper.TYPE_ORCHESTRATION,
451-
name,
452-
subInstanceId,
453-
id);
454-
if (clientCtx != null) {
455-
createSubOrchestrationActionBuilder.setParentTraceContext(clientCtx);
456-
}
457-
} else if (this.parentTraceContext != null) {
458-
createSubOrchestrationActionBuilder.setParentTraceContext(this.parentTraceContext);
449+
// Propagate the orchestration span's context so the Server span on the worker
450+
// is a child of the orchestration span.
451+
TraceContext propagatedCtx = this.orchestrationSpanContext != null
452+
? this.orchestrationSpanContext : this.parentTraceContext;
453+
if (propagatedCtx != null) {
454+
createSubOrchestrationActionBuilder.setParentTraceContext(propagatedCtx);
459455
}
460456

461457
this.pendingActions.put(id, OrchestratorAction.newBuilder()
@@ -551,6 +547,19 @@ private void handleTaskScheduled(HistoryEvent e) {
551547

552548
TaskScheduledEvent taskScheduled = e.getTaskScheduled();
553549

550+
// Store scheduling metadata for retroactive client span at completion time.
551+
// Use orchestrationSpanContext as parent so the client span is a child of the orchestration span.
552+
Instant scheduledTime = e.hasTimestamp()
553+
? DataConverter.getInstantFromTimestamp(e.getTimestamp())
554+
: null;
555+
TraceContext spanParent = this.orchestrationSpanContext != null
556+
? this.orchestrationSpanContext : this.parentTraceContext;
557+
this.scheduledTaskInfoMap.put(taskId, new ScheduledTaskInfo(
558+
taskScheduled.getName(),
559+
scheduledTime,
560+
spanParent,
561+
TracingHelper.TYPE_ACTIVITY));
562+
554563
// The history shows that this orchestrator created a durable task in a previous execution.
555564
// We can therefore remove it from the map of pending actions. If we can't find the pending
556565
// action, then we assume a non-deterministic code violation in the orchestrator.
@@ -586,6 +595,19 @@ private void handleTaskCompleted(HistoryEvent e) {
586595
taskId,
587596
rawResult != null ? rawResult : "(null)"));
588597

598+
// Emit a retroactive Client span covering scheduling-to-completion duration.
599+
// Matches .NET SDK's EmitTraceActivityForTaskCompleted pattern.
600+
ScheduledTaskInfo info = this.scheduledTaskInfoMap.remove(taskId);
601+
if (info != null) {
602+
TracingHelper.emitRetroactiveClientSpan(
603+
info.spanType + ":" + info.taskName,
604+
info.parentTraceContext,
605+
info.spanType,
606+
info.taskName,
607+
this.instanceId,
608+
taskId,
609+
info.scheduledTime);
610+
}
589611
}
590612
CompletableTask task = record.getTask();
591613
try {
@@ -609,6 +631,19 @@ private void handleTaskFailed(HistoryEvent e) {
609631

610632
if (!this.isReplaying) {
611633
// TODO: Log task failure, including the number of bytes in the result
634+
635+
// Emit a retroactive Client span covering scheduling-to-failure duration.
636+
ScheduledTaskInfo info = this.scheduledTaskInfoMap.remove(taskId);
637+
if (info != null) {
638+
TracingHelper.emitRetroactiveClientSpan(
639+
info.spanType + ":" + info.taskName,
640+
info.parentTraceContext,
641+
info.spanType,
642+
info.taskName,
643+
this.instanceId,
644+
taskId,
645+
info.scheduledTime);
646+
}
612647
}
613648

614649
CompletableTask<?> task = record.getTask();
@@ -713,6 +748,11 @@ private void handleTimerCreated(HistoryEvent e) {
713748

714749
TimerCreatedEvent timerCreatedEvent = e.getTimerCreated();
715750

751+
// Store the timer creation timestamp for the timer span duration
752+
if (e.hasTimestamp()) {
753+
this.timerCreationTimes.put(timerEventId, DataConverter.getInstantFromTimestamp(e.getTimestamp()));
754+
}
755+
716756
// The history shows that this orchestrator created a durable timer in a previous execution.
717757
// We can therefore remove it from the map of pending actions. If we can't find the pending
718758
// action, then we assume a non-deterministic code violation in the orchestrator.
@@ -736,16 +776,20 @@ public void handleTimerFired(HistoryEvent e) {
736776
}
737777

738778
if (!this.isReplaying) {
739-
// Emit a timer span matching .NET SDK's EmitTraceActivityForTimer
779+
// Emit a timer span with duration from creation to firing time.
780+
// Matches .NET SDK's EmitTraceActivityForTimer which spans from startTime to now.
740781
String fireAt = timerFiredEvent.hasFireAt()
741782
? DataConverter.getInstantFromTimestamp(timerFiredEvent.getFireAt()).toString()
742783
: null;
784+
Instant creationTime = this.timerCreationTimes.remove(timerEventId);
743785
TracingHelper.emitTimerSpan(
744786
this.getName(),
745787
this.instanceId,
746788
timerEventId,
747789
fireAt,
748-
this.parentTraceContext);
790+
this.orchestrationSpanContext != null
791+
? this.orchestrationSpanContext : this.parentTraceContext,
792+
creationTime);
749793
}
750794

751795
CompletableTask<?> task = record.getTask();
@@ -755,6 +799,19 @@ public void handleTimerFired(HistoryEvent e) {
755799
private void handleSubOrchestrationCreated(HistoryEvent e) {
756800
int taskId = e.getEventId();
757801
SubOrchestrationInstanceCreatedEvent subOrchestrationInstanceCreated = e.getSubOrchestrationInstanceCreated();
802+
803+
// Store scheduling metadata for retroactive client span at completion time
804+
Instant scheduledTime = e.hasTimestamp()
805+
? DataConverter.getInstantFromTimestamp(e.getTimestamp())
806+
: null;
807+
TraceContext spanParent = this.orchestrationSpanContext != null
808+
? this.orchestrationSpanContext : this.parentTraceContext;
809+
this.scheduledTaskInfoMap.put(taskId, new ScheduledTaskInfo(
810+
subOrchestrationInstanceCreated.getName(),
811+
scheduledTime,
812+
spanParent,
813+
TracingHelper.TYPE_ORCHESTRATION));
814+
758815
OrchestratorAction taskAction = this.pendingActions.remove(taskId);
759816
if (taskAction == null) {
760817
String message = String.format(
@@ -785,6 +842,18 @@ private void handleSubOrchestrationCompleted(HistoryEvent e) {
785842
taskId,
786843
rawResult != null ? rawResult : "(null)"));
787844

845+
// Emit a retroactive Client span covering scheduling-to-completion duration.
846+
ScheduledTaskInfo info = this.scheduledTaskInfoMap.remove(taskId);
847+
if (info != null) {
848+
TracingHelper.emitRetroactiveClientSpan(
849+
info.spanType + ":" + info.taskName,
850+
info.parentTraceContext,
851+
info.spanType,
852+
info.taskName,
853+
this.instanceId,
854+
taskId,
855+
info.scheduledTime);
856+
}
788857
}
789858
CompletableTask task = record.getTask();
790859
try {
@@ -808,6 +877,19 @@ private void handleSubOrchestrationFailed(HistoryEvent e){
808877

809878
if (!this.isReplaying) {
810879
// TODO: Log task failure, including the number of bytes in the result
880+
881+
// Emit a retroactive Client span covering scheduling-to-failure duration.
882+
ScheduledTaskInfo info = this.scheduledTaskInfoMap.remove(taskId);
883+
if (info != null) {
884+
TracingHelper.emitRetroactiveClientSpan(
885+
info.spanType + ":" + info.taskName,
886+
info.parentTraceContext,
887+
info.spanType,
888+
info.taskName,
889+
this.instanceId,
890+
taskId,
891+
info.scheduledTime);
892+
}
811893
}
812894

813895
CompletableTask<?> task = record.getTask();
@@ -1024,6 +1106,24 @@ public Class<V> getDataType() {
10241106
}
10251107
}
10261108

1109+
/**
1110+
* Stores scheduling metadata for retroactive client span creation at completion time.
1111+
* Matches .NET SDK pattern where client spans are emitted with the full scheduling-to-completion duration.
1112+
*/
1113+
private class ScheduledTaskInfo {
1114+
final String taskName;
1115+
final Instant scheduledTime;
1116+
final TraceContext parentTraceContext;
1117+
final String spanType; // "activity" or "orchestration"
1118+
1119+
ScheduledTaskInfo(String taskName, Instant scheduledTime, TraceContext parentTraceContext, String spanType) {
1120+
this.taskName = taskName;
1121+
this.scheduledTime = scheduledTime;
1122+
this.parentTraceContext = parentTraceContext;
1123+
this.spanType = spanType;
1124+
}
1125+
}
1126+
10271127
private class OrchestrationHistoryIterator {
10281128
private final List<HistoryEvent> pastEvents;
10291129
private final List<HistoryEvent> newEvents;

0 commit comments

Comments
 (0)