From ec7d22105c466d225cd114b0952ae4da968d70eb Mon Sep 17 00:00:00 2001 From: Droid Agent Date: Tue, 27 Jan 2026 15:18:02 +0000 Subject: [PATCH] fix(tui): prevent false 'task crashed' errors due to race condition The check_crashed_tasks() function was incorrectly detecting normal task completions as crashes. This happened because: 1. A task sends ToolEvent::Completed via the channel 2. The task terminates (is_finished() == true) 3. Before the event is processed, check_crashed_tasks() runs 4. It sees is_finished() == true and assumes it's a crash 5. It sends ANOTHER ToolEvent::Failed The fix properly awaits the JoinHandle to check if the task actually panicked or was cancelled. If handle.await returns Ok(()), we know the task completed normally and its event is just pending in the channel. This fixes the 'Subagent task terminated unexpectedly (possible panic or cancellation)' error that appeared for tools like ListSubagents, Glob, LS. --- cortex-tui/src/runner/event_loop.rs | 82 ++++++++++++++--------------- 1 file changed, 41 insertions(+), 41 deletions(-) diff --git a/cortex-tui/src/runner/event_loop.rs b/cortex-tui/src/runner/event_loop.rs index 9ae96801..49527c0f 100644 --- a/cortex-tui/src/runner/event_loop.rs +++ b/cortex-tui/src/runner/event_loop.rs @@ -3473,59 +3473,59 @@ impl EventLoop { /// Checks for crashed background tool tasks (panics or cancelled). /// /// This method is called periodically to detect subagent tasks that have - /// terminated unexpectedly without sending a completion or failure event. + /// terminated unexpectedly due to a panic or cancellation. /// When a crashed task is detected, it sends a ToolEvent::Failed to ensure /// the main agent always receives a response. + /// + /// IMPORTANT: We only send a Failed event if the task actually panicked or + /// was cancelled. If the task completed normally (Ok(())), we do NOT send + /// a failure event because the task already sent its own Completed/Failed + /// event through the channel - it just hasn't been processed yet due to + /// the async nature of the event loop. async fn check_crashed_tasks(&mut self) { - // Collect crashed task IDs and their errors - let mut crashed_tasks: Vec<(String, String)> = Vec::new(); - - // Check all running tool tasks - for (id, handle) in self.running_tool_tasks.iter() { - if handle.is_finished() { - // Task finished - check if it panicked by trying to get the result - // Note: We can't actually await the handle here since we don't own it, - // but is_finished() alone tells us the task ended unexpectedly - // (if it ended normally, it would have sent Completed/Failed events - // which would have removed it from running_tool_tasks) - crashed_tasks.push(( - id.clone(), - "Subagent task terminated unexpectedly (possible panic or cancellation)" - .to_string(), - )); - } - } - - // Process crashed tasks - for (id, error) in crashed_tasks { - tracing::error!("Detected crashed subagent task: {} - {}", id, error); + // Collect finished task IDs + let finished_task_ids: Vec = self + .running_tool_tasks + .iter() + .filter(|(_, handle)| handle.is_finished()) + .map(|(id, _)| id.clone()) + .collect(); - // Remove from running tasks + // Process finished tasks - check if they actually crashed + for id in finished_task_ids { if let Some(handle) = self.running_tool_tasks.remove(&id) { - // Try to get the panic message if possible - let error_msg = match handle.await { - Ok(()) => error, // Task completed but didn't send event - shouldn't happen + // Await the handle to check if it panicked or was cancelled + match handle.await { + Ok(()) => { + // Task completed normally - it sent its own Completed/Failed event + // through the channel. Don't send another Failed event. + // The event is just in transit and will be processed soon. + tracing::debug!("Task {} finished normally, event pending in channel", id); + } Err(join_error) => { - if join_error.is_panic() { + // Task actually crashed - send a failure event + let error_msg = if join_error.is_panic() { format!("Subagent panicked: {:?}", join_error.into_panic()) } else if join_error.is_cancelled() { "Subagent task was cancelled".to_string() } else { format!("Subagent task failed: {}", join_error) - } - } - }; + }; - // Send failure event through the tool event channel - let _ = self - .tool_event_tx - .send(ToolEvent::Failed { - id: id.clone(), - name: "Task".to_string(), - error: error_msg, - duration: std::time::Duration::from_secs(0), - }) - .await; + tracing::error!("Detected crashed task: {} - {}", id, error_msg); + + // Send failure event through the tool event channel + let _ = self + .tool_event_tx + .send(ToolEvent::Failed { + id: id.clone(), + name: "Task".to_string(), + error: error_msg, + duration: std::time::Duration::from_secs(0), + }) + .await; + } + } } } }