From febe65dc3f26f30844f2cb878bc6771aee0e9bcb Mon Sep 17 00:00:00 2001
From: "L. Elaine Dazzio" <elaine.dazzio@gmail.com>
Date: Mon, 2 Mar 2026 17:41:02 -0500
Subject: [PATCH 1/3] fix(python): filter unsupported kwargs in
 OllamaChatClient._inner_get_response

Orchestration layers like HandoffBuilder inject kwargs such as
allow_multiple_tool_calls that ollama.AsyncClient.chat() does not
accept, causing a TypeError.

Filter these kwargs before forwarding to the Ollama API, matching
the behavior documented in OllamaChatOptions where these fields
are explicitly marked as unsupported.

Fixes #4402
---
 .../agent_framework_ollama/_chat_client.py    | 16 ++++-
 .../ollama/tests/test_ollama_chat_client.py   | 58 +++++++++++++++++++
 2 files changed, 72 insertions(+), 2 deletions(-)

diff --git a/python/packages/ollama/agent_framework_ollama/_chat_client.py b/python/packages/ollama/agent_framework_ollama/_chat_client.py
index cc7fc0c9a7..fc0072df28 100644
--- a/python/packages/ollama/agent_framework_ollama/_chat_client.py
+++ b/python/packages/ollama/agent_framework_ollama/_chat_client.py
@@ -268,6 +268,13 @@ class OllamaChatOptions(ChatOptions[ResponseModelT], Generic[ResponseModelT], to
 }
 """Maps ChatOptions keys to Ollama model option parameter names."""
 
+# Kwargs that may be injected by orchestration layers (e.g. HandoffBuilder)
+# but are not supported by ollama.AsyncClient.chat().  These are silently
+# stripped in _inner_get_response so they never reach the Ollama API.
+_UNSUPPORTED_CHAT_KWARGS: set[str] = {
+    "allow_multiple_tool_calls",
+}
+
 OllamaChatOptionsT = TypeVar("OllamaChatOptionsT", bound=TypedDict, default="OllamaChatOptions", covariant=True)  # type: ignore[valid-type]
 
 
@@ -351,6 +358,11 @@ def _inner_get_response(
         stream: bool = False,
         **kwargs: Any,
     ) -> Awaitable[ChatResponse] | ResponseStream[ChatResponseUpdate, ChatResponse]:
+        # Filter out kwargs that are not supported by ollama.AsyncClient.chat().
+        # Orchestration layers (e.g. HandoffBuilder) may inject kwargs like
+        # allow_multiple_tool_calls that the Ollama Python client doesn't accept.
+        filtered_kwargs = {k: v for k, v in kwargs.items() if k not in _UNSUPPORTED_CHAT_KWARGS}
+
         if stream:
             # Streaming mode
             async def _stream() -> AsyncIterable[ChatResponseUpdate]:
@@ -360,7 +372,7 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]:
                     response_object: AsyncIterable[OllamaChatResponse] = await self.client.chat(  # type: ignore[misc]
                         stream=True,
                         **options_dict,
-                        **kwargs,
+                        **filtered_kwargs,
                     )
                 except Exception as ex:
                     raise ChatClientException(f"Ollama streaming chat request failed : {ex}", ex) from ex
@@ -378,7 +390,7 @@ async def _get_response() -> ChatResponse:
                 response: OllamaChatResponse = await self.client.chat(  # type: ignore[misc]
                     stream=False,
                     **options_dict,
-                    **kwargs,
+                    **filtered_kwargs,
                 )
             except Exception as ex:
                 raise ChatClientException(f"Ollama chat request failed : {ex}", ex) from ex
diff --git a/python/packages/ollama/tests/test_ollama_chat_client.py b/python/packages/ollama/tests/test_ollama_chat_client.py
index 6e10830bf4..f52c8a29a5 100644
--- a/python/packages/ollama/tests/test_ollama_chat_client.py
+++ b/python/packages/ollama/tests/test_ollama_chat_client.py
@@ -404,6 +404,64 @@ async def test_cmc_with_dict_tool_passthrough(
     assert call_kwargs["tools"] == [{"type": "function", "function": {"name": "custom_tool", "parameters": {}}}]
 
 
+@patch.object(AsyncClient, "chat", new_callable=AsyncMock)
+async def test_cmc_filters_unsupported_kwargs(
+    mock_chat: AsyncMock,
+    ollama_unit_test_env: dict[str, str],
+    chat_history: list[Message],
+    mock_chat_completion_response: OllamaChatResponse,
+) -> None:
+    """Verify that unsupported kwargs (e.g. allow_multiple_tool_calls) are
+    silently filtered out and never forwarded to ollama.AsyncClient.chat().
+
+    Regression test for https://github.com/microsoft/agent-framework/issues/4402
+    """
+    mock_chat.return_value = mock_chat_completion_response
+    chat_history.append(Message(text="hello world", role="user"))
+
+    ollama_client = OllamaChatClient()
+    # Pass allow_multiple_tool_calls as a kwarg — this is what HandoffBuilder does
+    await ollama_client.get_response(
+        messages=chat_history,
+        allow_multiple_tool_calls=True,
+    )
+
+    # Verify the call succeeded and allow_multiple_tool_calls was NOT forwarded
+    mock_chat.assert_called_once()
+    call_kwargs = mock_chat.call_args.kwargs
+    assert "allow_multiple_tool_calls" not in call_kwargs
+
+
+@patch.object(AsyncClient, "chat", new_callable=AsyncMock)
+async def test_cmc_streaming_filters_unsupported_kwargs(
+    mock_chat: AsyncMock,
+    ollama_unit_test_env: dict[str, str],
+    chat_history: list[Message],
+    mock_streaming_chat_completion_response: AsyncStream[OllamaChatResponse],
+) -> None:
+    """Verify that unsupported kwargs are filtered in streaming mode too.
+
+    Regression test for https://github.com/microsoft/agent-framework/issues/4402
+    """
+    mock_chat.return_value = mock_streaming_chat_completion_response
+    chat_history.append(Message(text="hello world", role="user"))
+
+    ollama_client = OllamaChatClient()
+    result = ollama_client.get_response(
+        messages=chat_history,
+        stream=True,
+        allow_multiple_tool_calls=True,
+    )
+
+    async for chunk in result:
+        assert chunk.text == "test"
+
+    # Verify allow_multiple_tool_calls was NOT forwarded
+    mock_chat.assert_called_once()
+    call_kwargs = mock_chat.call_args.kwargs
+    assert "allow_multiple_tool_calls" not in call_kwargs
+
+
 @patch.object(AsyncClient, "chat", new_callable=AsyncMock)
 async def test_cmc_with_data_content_type(
     mock_chat: AsyncMock,

From f9b01dcb276916183ea715ec1e2d356ee206ddc0 Mon Sep 17 00:00:00 2001
From: "L. Elaine Dazzio" <elaine.dazzio@gmail.com>
Date: Mon, 2 Mar 2026 17:50:22 -0500
Subject: [PATCH 2/3] fix: also strip allow_multiple_tool_calls from options
 dict

Address Copilot review feedback: allow_multiple_tool_calls can also
arrive via the options mapping (e.g. Agent.default_options, workflow
cloning), not just via **kwargs.

- Add allow_multiple_tool_calls to exclude_keys in _prepare_options()
- Add test_cmc_filters_unsupported_options regression test
---
 .../agent_framework_ollama/_chat_client.py    |  5 +--
 .../ollama/tests/test_ollama_chat_client.py   | 31 ++++++++++++++++++-
 2 files changed, 33 insertions(+), 3 deletions(-)

diff --git a/python/packages/ollama/agent_framework_ollama/_chat_client.py b/python/packages/ollama/agent_framework_ollama/_chat_client.py
index fc0072df28..cdfb884d74 100644
--- a/python/packages/ollama/agent_framework_ollama/_chat_client.py
+++ b/python/packages/ollama/agent_framework_ollama/_chat_client.py
@@ -407,8 +407,9 @@ def _prepare_options(self, messages: Sequence[Message], options: Mapping[str, An
 
             messages = prepend_instructions_to_messages(list(messages), instructions, role="system")
 
-        # Keys to exclude from processing
-        exclude_keys = {"instructions", "tool_choice"}
+        # Keys to exclude from processing — these are either handled separately
+        # or not supported by the Ollama API.
+        exclude_keys = {"instructions", "tool_choice", "allow_multiple_tool_calls"}
 
         # Build run_options and model_options separately
         run_options: dict[str, Any] = {}
diff --git a/python/packages/ollama/tests/test_ollama_chat_client.py b/python/packages/ollama/tests/test_ollama_chat_client.py
index f52c8a29a5..81cafe0cfe 100644
--- a/python/packages/ollama/tests/test_ollama_chat_client.py
+++ b/python/packages/ollama/tests/test_ollama_chat_client.py
@@ -420,7 +420,7 @@ async def test_cmc_filters_unsupported_kwargs(
     chat_history.append(Message(text="hello world", role="user"))
 
     ollama_client = OllamaChatClient()
-    # Pass allow_multiple_tool_calls as a kwarg — this is what HandoffBuilder does
+    # Pass allow_multiple_tool_calls as a top-level kwarg — this is what HandoffBuilder does
     await ollama_client.get_response(
         messages=chat_history,
         allow_multiple_tool_calls=True,
@@ -462,6 +462,35 @@ async def test_cmc_streaming_filters_unsupported_kwargs(
     assert "allow_multiple_tool_calls" not in call_kwargs
 
 
+@patch.object(AsyncClient, "chat", new_callable=AsyncMock)
+async def test_cmc_filters_unsupported_options(
+    mock_chat: AsyncMock,
+    ollama_unit_test_env: dict[str, str],
+    chat_history: list[Message],
+    mock_chat_completion_response: OllamaChatResponse,
+) -> None:
+    """Verify that unsupported keys inside the options dict (e.g. from
+    Agent.default_options or workflow cloning) are also stripped before
+    reaching ollama.AsyncClient.chat().
+
+    Regression test for https://github.com/microsoft/agent-framework/issues/4402
+    """
+    mock_chat.return_value = mock_chat_completion_response
+    chat_history.append(Message(text="hello world", role="user"))
+
+    ollama_client = OllamaChatClient()
+    # Pass allow_multiple_tool_calls inside the options dict
+    await ollama_client.get_response(
+        messages=chat_history,
+        options={"allow_multiple_tool_calls": True},
+    )
+
+    # Verify the call succeeded and allow_multiple_tool_calls was NOT forwarded
+    mock_chat.assert_called_once()
+    call_kwargs = mock_chat.call_args.kwargs
+    assert "allow_multiple_tool_calls" not in call_kwargs
+
+
 @patch.object(AsyncClient, "chat", new_callable=AsyncMock)
 async def test_cmc_with_data_content_type(
     mock_chat: AsyncMock,

From 3948f4d595f557f2b010443077ee52f3e3f0d87e Mon Sep 17 00:00:00 2001
From: "L. Elaine Dazzio" <elaine.dazzio@gmail.com>
Date: Mon, 2 Mar 2026 18:01:36 -0500
Subject: [PATCH 3/3] fix: update comment to not reference downstream modules

Address TaoChenOSU nit: use generic 'framework-level kwargs' wording
instead of mentioning HandoffBuilder specifically.
---
 .../ollama/agent_framework_ollama/_chat_client.py     | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/python/packages/ollama/agent_framework_ollama/_chat_client.py b/python/packages/ollama/agent_framework_ollama/_chat_client.py
index cdfb884d74..e4f3d65ceb 100644
--- a/python/packages/ollama/agent_framework_ollama/_chat_client.py
+++ b/python/packages/ollama/agent_framework_ollama/_chat_client.py
@@ -268,9 +268,9 @@ class OllamaChatOptions(ChatOptions[ResponseModelT], Generic[ResponseModelT], to
 }
 """Maps ChatOptions keys to Ollama model option parameter names."""
 
-# Kwargs that may be injected by orchestration layers (e.g. HandoffBuilder)
-# but are not supported by ollama.AsyncClient.chat().  These are silently
-# stripped in _inner_get_response so they never reach the Ollama API.
+# Framework-level kwargs that are not supported by ollama.AsyncClient.chat().
+# These are silently stripped in _inner_get_response so they never reach the
+# Ollama API.
 _UNSUPPORTED_CHAT_KWARGS: set[str] = {
     "allow_multiple_tool_calls",
 }
@@ -358,9 +358,8 @@ def _inner_get_response(
         stream: bool = False,
         **kwargs: Any,
     ) -> Awaitable[ChatResponse] | ResponseStream[ChatResponseUpdate, ChatResponse]:
-        # Filter out kwargs that are not supported by ollama.AsyncClient.chat().
-        # Orchestration layers (e.g. HandoffBuilder) may inject kwargs like
-        # allow_multiple_tool_calls that the Ollama Python client doesn't accept.
+        # Filter out framework-level kwargs that are not supported by
+        # ollama.AsyncClient.chat().
         filtered_kwargs = {k: v for k, v in kwargs.items() if k not in _UNSUPPORTED_CHAT_KWARGS}
 
         if stream: