Skip to content
77 changes: 77 additions & 0 deletions crates/openfang-runtime/src/agent_loop.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,27 @@ const MAX_CONTINUATIONS: u32 = 5;
/// Maximum message history size before auto-trimming to prevent context overflow.
const MAX_HISTORY_MESSAGES: usize = 20;

/// Extra guidance injected after failed tool calls to prevent fabricated follow-up actions.
const TOOL_ERROR_GUIDANCE: &str =
"[System: One or more tool calls failed. Failed tools did not produce usable data. Do NOT invent missing results, cite nonexistent search results, or pretend failed tools succeeded. If your next steps depend on a failed tool, either retry with a materially different approach or explain the failure to the user and stop. Do not write files, store memory, or take downstream actions based on failed tool outputs.]";

fn append_tool_error_guidance(tool_result_blocks: &mut Vec<ContentBlock>) {
let has_tool_error = tool_result_blocks.iter().any(|block| {
matches!(
block,
ContentBlock::ToolResult {
is_error: true,
..
}
)
});
if has_tool_error {
tool_result_blocks.push(ContentBlock::Text {
text: TOOL_ERROR_GUIDANCE.to_string(),
});
}
}

/// Strip a provider prefix from a model ID before sending to the API.
///
/// Many models are stored as `provider/org/model` (e.g. `openrouter/google/gemini-2.5-flash`)
Expand Down Expand Up @@ -690,6 +711,8 @@ pub async fn run_agent_loop(
});
}

append_tool_error_guidance(&mut tool_result_blocks);

// Detect approval denials and inject guidance to prevent infinite retry loops
let denial_count = tool_result_blocks.iter().filter(|b| {
matches!(b, ContentBlock::ToolResult { content, is_error: true, .. }
Expand Down Expand Up @@ -1630,6 +1653,8 @@ pub async fn run_agent_loop_streaming(
});
}

append_tool_error_guidance(&mut tool_result_blocks);

// Detect approval denials and inject guidance to prevent infinite retry loops
let denial_count = tool_result_blocks.iter().filter(|b| {
matches!(b, ContentBlock::ToolResult { content, is_error: true, .. }
Expand Down Expand Up @@ -2236,6 +2261,58 @@ mod tests {
);
}

#[tokio::test]
async fn test_tool_error_injects_no_fabrication_guidance() {
let memory = openfang_memory::MemorySubstrate::open_in_memory(0.01).unwrap();
let agent_id = openfang_types::agent::AgentId::new();
let mut session = openfang_memory::session::Session {
id: openfang_types::agent::SessionId::new(),
agent_id,
messages: Vec::new(),
context_window_tokens: 0,
label: None,
};
let manifest = test_manifest();
let driver: Arc<dyn LlmDriver> = Arc::new(EmptyAfterToolUseDriver::new());

run_agent_loop(
&manifest,
"Do something with tools",
&mut session,
&memory,
driver,
&[], // no tools registered — the tool call will fail, which is fine
None,
None,
None,
None,
None,
None,
None,
None, // on_phase
None, // media_engine
None, // tts_engine
None, // docker_config
None, // hooks
None, // context_window_tokens
None, // process_manager
)
.await
.expect("Loop should complete without error");

let guidance_seen = session.messages.iter().any(|msg| match &msg.content {
MessageContent::Blocks(blocks) => blocks.iter().any(|block| {
matches!(block, ContentBlock::Text { text } if text == TOOL_ERROR_GUIDANCE)
}),
_ => false,
});

assert!(
guidance_seen,
"Expected tool error guidance in session messages after failed tool call"
);
}

#[tokio::test]
async fn test_empty_response_max_tokens_returns_fallback() {
let memory = openfang_memory::MemorySubstrate::open_in_memory(0.01).unwrap();
Expand Down