Skip to content

Commit ec7d221

Browse files
committed
fix(tui): prevent false 'task crashed' errors due to race condition
The check_crashed_tasks() function was incorrectly detecting normal task completions as crashes. This happened because: 1. A task sends ToolEvent::Completed via the channel 2. The task terminates (is_finished() == true) 3. Before the event is processed, check_crashed_tasks() runs 4. It sees is_finished() == true and assumes it's a crash 5. It sends ANOTHER ToolEvent::Failed The fix properly awaits the JoinHandle to check if the task actually panicked or was cancelled. If handle.await returns Ok(()), we know the task completed normally and its event is just pending in the channel. This fixes the 'Subagent task terminated unexpectedly (possible panic or cancellation)' error that appeared for tools like ListSubagents, Glob, LS.
1 parent 8ba6676 commit ec7d221

File tree

1 file changed

+41
-41
lines changed

1 file changed

+41
-41
lines changed

cortex-tui/src/runner/event_loop.rs

Lines changed: 41 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -3473,59 +3473,59 @@ impl EventLoop {
34733473
/// Checks for crashed background tool tasks (panics or cancelled).
34743474
///
34753475
/// This method is called periodically to detect subagent tasks that have
3476-
/// terminated unexpectedly without sending a completion or failure event.
3476+
/// terminated unexpectedly due to a panic or cancellation.
34773477
/// When a crashed task is detected, it sends a ToolEvent::Failed to ensure
34783478
/// the main agent always receives a response.
3479+
///
3480+
/// IMPORTANT: We only send a Failed event if the task actually panicked or
3481+
/// was cancelled. If the task completed normally (Ok(())), we do NOT send
3482+
/// a failure event because the task already sent its own Completed/Failed
3483+
/// event through the channel - it just hasn't been processed yet due to
3484+
/// the async nature of the event loop.
34793485
async fn check_crashed_tasks(&mut self) {
3480-
// Collect crashed task IDs and their errors
3481-
let mut crashed_tasks: Vec<(String, String)> = Vec::new();
3482-
3483-
// Check all running tool tasks
3484-
for (id, handle) in self.running_tool_tasks.iter() {
3485-
if handle.is_finished() {
3486-
// Task finished - check if it panicked by trying to get the result
3487-
// Note: We can't actually await the handle here since we don't own it,
3488-
// but is_finished() alone tells us the task ended unexpectedly
3489-
// (if it ended normally, it would have sent Completed/Failed events
3490-
// which would have removed it from running_tool_tasks)
3491-
crashed_tasks.push((
3492-
id.clone(),
3493-
"Subagent task terminated unexpectedly (possible panic or cancellation)"
3494-
.to_string(),
3495-
));
3496-
}
3497-
}
3498-
3499-
// Process crashed tasks
3500-
for (id, error) in crashed_tasks {
3501-
tracing::error!("Detected crashed subagent task: {} - {}", id, error);
3486+
// Collect finished task IDs
3487+
let finished_task_ids: Vec<String> = self
3488+
.running_tool_tasks
3489+
.iter()
3490+
.filter(|(_, handle)| handle.is_finished())
3491+
.map(|(id, _)| id.clone())
3492+
.collect();
35023493

3503-
// Remove from running tasks
3494+
// Process finished tasks - check if they actually crashed
3495+
for id in finished_task_ids {
35043496
if let Some(handle) = self.running_tool_tasks.remove(&id) {
3505-
// Try to get the panic message if possible
3506-
let error_msg = match handle.await {
3507-
Ok(()) => error, // Task completed but didn't send event - shouldn't happen
3497+
// Await the handle to check if it panicked or was cancelled
3498+
match handle.await {
3499+
Ok(()) => {
3500+
// Task completed normally - it sent its own Completed/Failed event
3501+
// through the channel. Don't send another Failed event.
3502+
// The event is just in transit and will be processed soon.
3503+
tracing::debug!("Task {} finished normally, event pending in channel", id);
3504+
}
35083505
Err(join_error) => {
3509-
if join_error.is_panic() {
3506+
// Task actually crashed - send a failure event
3507+
let error_msg = if join_error.is_panic() {
35103508
format!("Subagent panicked: {:?}", join_error.into_panic())
35113509
} else if join_error.is_cancelled() {
35123510
"Subagent task was cancelled".to_string()
35133511
} else {
35143512
format!("Subagent task failed: {}", join_error)
3515-
}
3516-
}
3517-
};
3513+
};
35183514

3519-
// Send failure event through the tool event channel
3520-
let _ = self
3521-
.tool_event_tx
3522-
.send(ToolEvent::Failed {
3523-
id: id.clone(),
3524-
name: "Task".to_string(),
3525-
error: error_msg,
3526-
duration: std::time::Duration::from_secs(0),
3527-
})
3528-
.await;
3515+
tracing::error!("Detected crashed task: {} - {}", id, error_msg);
3516+
3517+
// Send failure event through the tool event channel
3518+
let _ = self
3519+
.tool_event_tx
3520+
.send(ToolEvent::Failed {
3521+
id: id.clone(),
3522+
name: "Task".to_string(),
3523+
error: error_msg,
3524+
duration: std::time::Duration::from_secs(0),
3525+
})
3526+
.await;
3527+
}
3528+
}
35293529
}
35303530
}
35313531
}

0 commit comments

Comments
 (0)