From febe65dc3f26f30844f2cb878bc6771aee0e9bcb Mon Sep 17 00:00:00 2001 From: "L. Elaine Dazzio" Date: Mon, 2 Mar 2026 17:41:02 -0500 Subject: [PATCH 1/3] fix(python): filter unsupported kwargs in OllamaChatClient._inner_get_response Orchestration layers like HandoffBuilder inject kwargs such as allow_multiple_tool_calls that ollama.AsyncClient.chat() does not accept, causing a TypeError. Filter these kwargs before forwarding to the Ollama API, matching the behavior documented in OllamaChatOptions where these fields are explicitly marked as unsupported. Fixes #4402 --- .../agent_framework_ollama/_chat_client.py | 16 ++++- .../ollama/tests/test_ollama_chat_client.py | 58 +++++++++++++++++++ 2 files changed, 72 insertions(+), 2 deletions(-) diff --git a/python/packages/ollama/agent_framework_ollama/_chat_client.py b/python/packages/ollama/agent_framework_ollama/_chat_client.py index cc7fc0c9a7..fc0072df28 100644 --- a/python/packages/ollama/agent_framework_ollama/_chat_client.py +++ b/python/packages/ollama/agent_framework_ollama/_chat_client.py @@ -268,6 +268,13 @@ class OllamaChatOptions(ChatOptions[ResponseModelT], Generic[ResponseModelT], to } """Maps ChatOptions keys to Ollama model option parameter names.""" +# Kwargs that may be injected by orchestration layers (e.g. HandoffBuilder) +# but are not supported by ollama.AsyncClient.chat(). These are silently +# stripped in _inner_get_response so they never reach the Ollama API. +_UNSUPPORTED_CHAT_KWARGS: set[str] = { + "allow_multiple_tool_calls", +} + OllamaChatOptionsT = TypeVar("OllamaChatOptionsT", bound=TypedDict, default="OllamaChatOptions", covariant=True) # type: ignore[valid-type] @@ -351,6 +358,11 @@ def _inner_get_response( stream: bool = False, **kwargs: Any, ) -> Awaitable[ChatResponse] | ResponseStream[ChatResponseUpdate, ChatResponse]: + # Filter out kwargs that are not supported by ollama.AsyncClient.chat(). + # Orchestration layers (e.g. HandoffBuilder) may inject kwargs like + # allow_multiple_tool_calls that the Ollama Python client doesn't accept. + filtered_kwargs = {k: v for k, v in kwargs.items() if k not in _UNSUPPORTED_CHAT_KWARGS} + if stream: # Streaming mode async def _stream() -> AsyncIterable[ChatResponseUpdate]: @@ -360,7 +372,7 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]: response_object: AsyncIterable[OllamaChatResponse] = await self.client.chat( # type: ignore[misc] stream=True, **options_dict, - **kwargs, + **filtered_kwargs, ) except Exception as ex: raise ChatClientException(f"Ollama streaming chat request failed : {ex}", ex) from ex @@ -378,7 +390,7 @@ async def _get_response() -> ChatResponse: response: OllamaChatResponse = await self.client.chat( # type: ignore[misc] stream=False, **options_dict, - **kwargs, + **filtered_kwargs, ) except Exception as ex: raise ChatClientException(f"Ollama chat request failed : {ex}", ex) from ex diff --git a/python/packages/ollama/tests/test_ollama_chat_client.py b/python/packages/ollama/tests/test_ollama_chat_client.py index 6e10830bf4..f52c8a29a5 100644 --- a/python/packages/ollama/tests/test_ollama_chat_client.py +++ b/python/packages/ollama/tests/test_ollama_chat_client.py @@ -404,6 +404,64 @@ async def test_cmc_with_dict_tool_passthrough( assert call_kwargs["tools"] == [{"type": "function", "function": {"name": "custom_tool", "parameters": {}}}] +@patch.object(AsyncClient, "chat", new_callable=AsyncMock) +async def test_cmc_filters_unsupported_kwargs( + mock_chat: AsyncMock, + ollama_unit_test_env: dict[str, str], + chat_history: list[Message], + mock_chat_completion_response: OllamaChatResponse, +) -> None: + """Verify that unsupported kwargs (e.g. allow_multiple_tool_calls) are + silently filtered out and never forwarded to ollama.AsyncClient.chat(). + + Regression test for https://github.com/microsoft/agent-framework/issues/4402 + """ + mock_chat.return_value = mock_chat_completion_response + chat_history.append(Message(text="hello world", role="user")) + + ollama_client = OllamaChatClient() + # Pass allow_multiple_tool_calls as a kwarg — this is what HandoffBuilder does + await ollama_client.get_response( + messages=chat_history, + allow_multiple_tool_calls=True, + ) + + # Verify the call succeeded and allow_multiple_tool_calls was NOT forwarded + mock_chat.assert_called_once() + call_kwargs = mock_chat.call_args.kwargs + assert "allow_multiple_tool_calls" not in call_kwargs + + +@patch.object(AsyncClient, "chat", new_callable=AsyncMock) +async def test_cmc_streaming_filters_unsupported_kwargs( + mock_chat: AsyncMock, + ollama_unit_test_env: dict[str, str], + chat_history: list[Message], + mock_streaming_chat_completion_response: AsyncStream[OllamaChatResponse], +) -> None: + """Verify that unsupported kwargs are filtered in streaming mode too. + + Regression test for https://github.com/microsoft/agent-framework/issues/4402 + """ + mock_chat.return_value = mock_streaming_chat_completion_response + chat_history.append(Message(text="hello world", role="user")) + + ollama_client = OllamaChatClient() + result = ollama_client.get_response( + messages=chat_history, + stream=True, + allow_multiple_tool_calls=True, + ) + + async for chunk in result: + assert chunk.text == "test" + + # Verify allow_multiple_tool_calls was NOT forwarded + mock_chat.assert_called_once() + call_kwargs = mock_chat.call_args.kwargs + assert "allow_multiple_tool_calls" not in call_kwargs + + @patch.object(AsyncClient, "chat", new_callable=AsyncMock) async def test_cmc_with_data_content_type( mock_chat: AsyncMock, From f9b01dcb276916183ea715ec1e2d356ee206ddc0 Mon Sep 17 00:00:00 2001 From: "L. Elaine Dazzio" Date: Mon, 2 Mar 2026 17:50:22 -0500 Subject: [PATCH 2/3] fix: also strip allow_multiple_tool_calls from options dict Address Copilot review feedback: allow_multiple_tool_calls can also arrive via the options mapping (e.g. Agent.default_options, workflow cloning), not just via **kwargs. - Add allow_multiple_tool_calls to exclude_keys in _prepare_options() - Add test_cmc_filters_unsupported_options regression test --- .../agent_framework_ollama/_chat_client.py | 5 +-- .../ollama/tests/test_ollama_chat_client.py | 31 ++++++++++++++++++- 2 files changed, 33 insertions(+), 3 deletions(-) diff --git a/python/packages/ollama/agent_framework_ollama/_chat_client.py b/python/packages/ollama/agent_framework_ollama/_chat_client.py index fc0072df28..cdfb884d74 100644 --- a/python/packages/ollama/agent_framework_ollama/_chat_client.py +++ b/python/packages/ollama/agent_framework_ollama/_chat_client.py @@ -407,8 +407,9 @@ def _prepare_options(self, messages: Sequence[Message], options: Mapping[str, An messages = prepend_instructions_to_messages(list(messages), instructions, role="system") - # Keys to exclude from processing - exclude_keys = {"instructions", "tool_choice"} + # Keys to exclude from processing — these are either handled separately + # or not supported by the Ollama API. + exclude_keys = {"instructions", "tool_choice", "allow_multiple_tool_calls"} # Build run_options and model_options separately run_options: dict[str, Any] = {} diff --git a/python/packages/ollama/tests/test_ollama_chat_client.py b/python/packages/ollama/tests/test_ollama_chat_client.py index f52c8a29a5..81cafe0cfe 100644 --- a/python/packages/ollama/tests/test_ollama_chat_client.py +++ b/python/packages/ollama/tests/test_ollama_chat_client.py @@ -420,7 +420,7 @@ async def test_cmc_filters_unsupported_kwargs( chat_history.append(Message(text="hello world", role="user")) ollama_client = OllamaChatClient() - # Pass allow_multiple_tool_calls as a kwarg — this is what HandoffBuilder does + # Pass allow_multiple_tool_calls as a top-level kwarg — this is what HandoffBuilder does await ollama_client.get_response( messages=chat_history, allow_multiple_tool_calls=True, @@ -462,6 +462,35 @@ async def test_cmc_streaming_filters_unsupported_kwargs( assert "allow_multiple_tool_calls" not in call_kwargs +@patch.object(AsyncClient, "chat", new_callable=AsyncMock) +async def test_cmc_filters_unsupported_options( + mock_chat: AsyncMock, + ollama_unit_test_env: dict[str, str], + chat_history: list[Message], + mock_chat_completion_response: OllamaChatResponse, +) -> None: + """Verify that unsupported keys inside the options dict (e.g. from + Agent.default_options or workflow cloning) are also stripped before + reaching ollama.AsyncClient.chat(). + + Regression test for https://github.com/microsoft/agent-framework/issues/4402 + """ + mock_chat.return_value = mock_chat_completion_response + chat_history.append(Message(text="hello world", role="user")) + + ollama_client = OllamaChatClient() + # Pass allow_multiple_tool_calls inside the options dict + await ollama_client.get_response( + messages=chat_history, + options={"allow_multiple_tool_calls": True}, + ) + + # Verify the call succeeded and allow_multiple_tool_calls was NOT forwarded + mock_chat.assert_called_once() + call_kwargs = mock_chat.call_args.kwargs + assert "allow_multiple_tool_calls" not in call_kwargs + + @patch.object(AsyncClient, "chat", new_callable=AsyncMock) async def test_cmc_with_data_content_type( mock_chat: AsyncMock, From 3948f4d595f557f2b010443077ee52f3e3f0d87e Mon Sep 17 00:00:00 2001 From: "L. Elaine Dazzio" Date: Mon, 2 Mar 2026 18:01:36 -0500 Subject: [PATCH 3/3] fix: update comment to not reference downstream modules Address TaoChenOSU nit: use generic 'framework-level kwargs' wording instead of mentioning HandoffBuilder specifically. --- .../ollama/agent_framework_ollama/_chat_client.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/python/packages/ollama/agent_framework_ollama/_chat_client.py b/python/packages/ollama/agent_framework_ollama/_chat_client.py index cdfb884d74..e4f3d65ceb 100644 --- a/python/packages/ollama/agent_framework_ollama/_chat_client.py +++ b/python/packages/ollama/agent_framework_ollama/_chat_client.py @@ -268,9 +268,9 @@ class OllamaChatOptions(ChatOptions[ResponseModelT], Generic[ResponseModelT], to } """Maps ChatOptions keys to Ollama model option parameter names.""" -# Kwargs that may be injected by orchestration layers (e.g. HandoffBuilder) -# but are not supported by ollama.AsyncClient.chat(). These are silently -# stripped in _inner_get_response so they never reach the Ollama API. +# Framework-level kwargs that are not supported by ollama.AsyncClient.chat(). +# These are silently stripped in _inner_get_response so they never reach the +# Ollama API. _UNSUPPORTED_CHAT_KWARGS: set[str] = { "allow_multiple_tool_calls", } @@ -358,9 +358,8 @@ def _inner_get_response( stream: bool = False, **kwargs: Any, ) -> Awaitable[ChatResponse] | ResponseStream[ChatResponseUpdate, ChatResponse]: - # Filter out kwargs that are not supported by ollama.AsyncClient.chat(). - # Orchestration layers (e.g. HandoffBuilder) may inject kwargs like - # allow_multiple_tool_calls that the Ollama Python client doesn't accept. + # Filter out framework-level kwargs that are not supported by + # ollama.AsyncClient.chat(). filtered_kwargs = {k: v for k, v in kwargs.items() if k not in _UNSUPPORTED_CHAT_KWARGS} if stream: