From cbb63568a524c6c571bd2f73460ed764b99ed4d0 Mon Sep 17 00:00:00 2001
From: Evan Mattson <evan.mattson@microsoft.com>
Date: Tue, 3 Mar 2026 17:34:46 +0900
Subject: [PATCH 1/2] Fix duplicate tool_calls messages in HandoffBuilder with
 tool approval (#4411)

HandoffAgentExecutor._clone_chat_agent() created cloned agents with empty
context_providers, causing Agent._prepare_run_context() to auto-inject an
InMemoryHistoryProvider. This provider independently loaded stored messages
alongside the full conversation replay from _full_conversation, producing
duplicate assistant tool_calls messages without matching tool responses.
The OpenAI Chat Completions API rejects this with a 400 error.

Fix: Suppress history providers in cloned handoff agents by replacing them
with a disabled InMemoryHistoryProvider (load_messages=False, store_inputs=False,
store_outputs=False). This prevents auto-injection while preserving non-history
context providers.

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
---
 .../_handoff.py                               | 13 ++-
 .../orchestrations/tests/test_handoff.py      | 94 ++++++++++++++++++-
 2 files changed, 104 insertions(+), 3 deletions(-)

diff --git a/python/packages/orchestrations/agent_framework_orchestrations/_handoff.py b/python/packages/orchestrations/agent_framework_orchestrations/_handoff.py
index d2ff5af959..9e366cd62f 100644
--- a/python/packages/orchestrations/agent_framework_orchestrations/_handoff.py
+++ b/python/packages/orchestrations/agent_framework_orchestrations/_handoff.py
@@ -39,7 +39,7 @@
 
 from agent_framework import Agent, SupportsAgentRun
 from agent_framework._middleware import FunctionInvocationContext, FunctionMiddleware
-from agent_framework._sessions import AgentSession
+from agent_framework._sessions import AgentSession, BaseContextProvider, BaseHistoryProvider, InMemoryHistoryProvider
 from agent_framework._tools import FunctionTool, tool
 from agent_framework._types import AgentResponse, AgentResponseUpdate, Content, Message
 from agent_framework._workflows._agent_executor import AgentExecutor, AgentExecutorRequest, AgentExecutorResponse
@@ -390,12 +390,21 @@ def _clone_chat_agent(self, agent: Agent) -> Agent:
             "user": options.get("user"),
         }
 
+        # Handoff workflows manage full conversation state via _full_conversation.
+        # Suppress history providers to prevent duplicate messages on approval resume.
+        context_providers: list[BaseContextProvider] = [
+            p for p in agent.context_providers if not isinstance(p, BaseHistoryProvider)
+        ]
+        context_providers.append(
+            InMemoryHistoryProvider(load_messages=False, store_inputs=False, store_outputs=False)
+        )
+
         return Agent(
             client=agent.client,
             id=agent.id,
             name=agent.name,
             description=agent.description,
-            context_providers=agent.context_providers,
+            context_providers=context_providers,
             middleware=middleware,
             default_options=cloned_options,  # type: ignore[arg-type]
         )
diff --git a/python/packages/orchestrations/tests/test_handoff.py b/python/packages/orchestrations/tests/test_handoff.py
index e0d94355b6..a064f5c935 100644
--- a/python/packages/orchestrations/tests/test_handoff.py
+++ b/python/packages/orchestrations/tests/test_handoff.py
@@ -472,7 +472,99 @@ async def _get() -> ChatResponse:
     assert client.resume_validated is True
 
 
-async def test_handoff_replay_serializes_handoff_function_results() -> None:
+async def test_handoff_tool_approval_does_not_duplicate_tool_calls_messages() -> None:
+    """InMemoryHistoryProvider must not cause duplicate tool_calls on approval resume (#4411)."""
+
+    @tool(name="submit_refund", approval_mode="always_require")
+    def submit_refund() -> str:
+        return "ok"
+
+    class DuplicateDetectingClient(ChatMiddlewareLayer[Any], FunctionInvocationLayer[Any], BaseChatClient[Any]):
+        def __init__(self) -> None:
+            ChatMiddlewareLayer.__init__(self)
+            FunctionInvocationLayer.__init__(self)
+            BaseChatClient.__init__(self)
+            self._call_index = 0
+            self.resume_validated = False
+
+        def _inner_get_response(
+            self,
+            *,
+            messages: Sequence[Message],
+            stream: bool,
+            options: Mapping[str, Any],
+            **kwargs: Any,
+        ) -> Awaitable[ChatResponse] | ResponseStream[ChatResponseUpdate, ChatResponse]:
+            del options
+            del kwargs
+
+            if self._call_index == 0:
+                contents = [
+                    Content.from_function_call(
+                        call_id="refund-call-1",
+                        name="submit_refund",
+                        arguments={},
+                    )
+                ]
+            else:
+                # Each assistant message with tool_calls must have a matching tool response.
+                # Duplicate tool_calls without responses trigger an OpenAI 400 error.
+                tool_call_ids: list[str] = []
+                tool_result_ids: set[str] = set()
+                for msg in messages:
+                    for content in msg.contents:
+                        if content.type == "function_call" and content.call_id:
+                            tool_call_ids.append(content.call_id)
+                        elif content.type == "function_result" and content.call_id:
+                            tool_result_ids.add(content.call_id)
+                unmatched = [cid for cid in tool_call_ids if cid not in tool_result_ids]
+                if unmatched:
+                    raise AssertionError(
+                        f"Assistant tool_calls without matching tool response: {unmatched}. "
+                        "This would cause a 400 error from the OpenAI Chat Completions API."
+                    )
+                self.resume_validated = True
+                contents = [Content.from_text(text="Refund submitted.")]
+
+            self._call_index += 1
+
+            if stream:
+
+                async def _stream() -> AsyncIterable[ChatResponseUpdate]:
+                    yield ChatResponseUpdate(contents=contents, role="assistant", finish_reason="stop")
+
+                return ResponseStream(_stream(), finalizer=lambda updates: ChatResponse.from_updates(updates))
+
+            async def _get() -> ChatResponse:
+                return ChatResponse(
+                    messages=[Message(role="assistant", contents=contents)],
+                    response_id="dup-detect",
+                )
+
+            return _get()
+
+    client = DuplicateDetectingClient()
+    agent = Agent(
+        id="refund_agent",
+        name="refund_agent",
+        client=client,
+        tools=[submit_refund],
+    )
+    workflow = (
+        HandoffBuilder(participants=[agent], termination_condition=lambda _: False).with_start_agent(agent).build()
+    )
+
+    first_events = await _drain(workflow.run("Refund order 123", stream=True))
+    approval_requests = [
+        event for event in first_events if event.type == "request_info" and isinstance(event.data, Content)
+    ]
+    assert approval_requests
+    first_request = approval_requests[0]
+
+    approval_response = first_request.data.to_function_approval_response(True)
+    await _drain(workflow.run(stream=True, responses={first_request.request_id: approval_response}))
+
+    assert client.resume_validated is True
     """Returning to the same agent must not replay dict tool outputs."""
 
     class ReplaySafeHandoffClient(ChatMiddlewareLayer[Any], FunctionInvocationLayer[Any], BaseChatClient[Any]):

From eeee66596febc320d31b784b490c91b2186f2624 Mon Sep 17 00:00:00 2001
From: Evan Mattson <evan.mattson@microsoft.com>
Date: Tue, 3 Mar 2026 17:35:04 +0900
Subject: [PATCH 2/2] Apply pre-commit auto-fixes

---
 .../orchestrations/agent_framework_orchestrations/_handoff.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/python/packages/orchestrations/agent_framework_orchestrations/_handoff.py b/python/packages/orchestrations/agent_framework_orchestrations/_handoff.py
index 9e366cd62f..bc2ccc47b7 100644
--- a/python/packages/orchestrations/agent_framework_orchestrations/_handoff.py
+++ b/python/packages/orchestrations/agent_framework_orchestrations/_handoff.py
@@ -395,9 +395,7 @@ def _clone_chat_agent(self, agent: Agent) -> Agent:
         context_providers: list[BaseContextProvider] = [
             p for p in agent.context_providers if not isinstance(p, BaseHistoryProvider)
         ]
-        context_providers.append(
-            InMemoryHistoryProvider(load_messages=False, store_inputs=False, store_outputs=False)
-        )
+        context_providers.append(InMemoryHistoryProvider(load_messages=False, store_inputs=False, store_outputs=False))
 
         return Agent(
             client=agent.client,