diff --git a/cortex-tui/src/runner/event_loop.rs b/cortex-tui/src/runner/event_loop.rs index 9ae96801..49527c0f 100644 --- a/cortex-tui/src/runner/event_loop.rs +++ b/cortex-tui/src/runner/event_loop.rs @@ -3473,59 +3473,59 @@ impl EventLoop { /// Checks for crashed background tool tasks (panics or cancelled). /// /// This method is called periodically to detect subagent tasks that have - /// terminated unexpectedly without sending a completion or failure event. + /// terminated unexpectedly due to a panic or cancellation. /// When a crashed task is detected, it sends a ToolEvent::Failed to ensure /// the main agent always receives a response. + /// + /// IMPORTANT: We only send a Failed event if the task actually panicked or + /// was cancelled. If the task completed normally (Ok(())), we do NOT send + /// a failure event because the task already sent its own Completed/Failed + /// event through the channel - it just hasn't been processed yet due to + /// the async nature of the event loop. async fn check_crashed_tasks(&mut self) { - // Collect crashed task IDs and their errors - let mut crashed_tasks: Vec<(String, String)> = Vec::new(); - - // Check all running tool tasks - for (id, handle) in self.running_tool_tasks.iter() { - if handle.is_finished() { - // Task finished - check if it panicked by trying to get the result - // Note: We can't actually await the handle here since we don't own it, - // but is_finished() alone tells us the task ended unexpectedly - // (if it ended normally, it would have sent Completed/Failed events - // which would have removed it from running_tool_tasks) - crashed_tasks.push(( - id.clone(), - "Subagent task terminated unexpectedly (possible panic or cancellation)" - .to_string(), - )); - } - } - - // Process crashed tasks - for (id, error) in crashed_tasks { - tracing::error!("Detected crashed subagent task: {} - {}", id, error); + // Collect finished task IDs + let finished_task_ids: Vec = self + .running_tool_tasks + .iter() + .filter(|(_, handle)| handle.is_finished()) + .map(|(id, _)| id.clone()) + .collect(); - // Remove from running tasks + // Process finished tasks - check if they actually crashed + for id in finished_task_ids { if let Some(handle) = self.running_tool_tasks.remove(&id) { - // Try to get the panic message if possible - let error_msg = match handle.await { - Ok(()) => error, // Task completed but didn't send event - shouldn't happen + // Await the handle to check if it panicked or was cancelled + match handle.await { + Ok(()) => { + // Task completed normally - it sent its own Completed/Failed event + // through the channel. Don't send another Failed event. + // The event is just in transit and will be processed soon. + tracing::debug!("Task {} finished normally, event pending in channel", id); + } Err(join_error) => { - if join_error.is_panic() { + // Task actually crashed - send a failure event + let error_msg = if join_error.is_panic() { format!("Subagent panicked: {:?}", join_error.into_panic()) } else if join_error.is_cancelled() { "Subagent task was cancelled".to_string() } else { format!("Subagent task failed: {}", join_error) - } - } - }; + }; - // Send failure event through the tool event channel - let _ = self - .tool_event_tx - .send(ToolEvent::Failed { - id: id.clone(), - name: "Task".to_string(), - error: error_msg, - duration: std::time::Duration::from_secs(0), - }) - .await; + tracing::error!("Detected crashed task: {} - {}", id, error_msg); + + // Send failure event through the tool event channel + let _ = self + .tool_event_tx + .send(ToolEvent::Failed { + id: id.clone(), + name: "Task".to_string(), + error: error_msg, + duration: std::time::Duration::from_secs(0), + }) + .await; + } + } } } }