diff --git a/python/packages/ollama/agent_framework_ollama/_chat_client.py b/python/packages/ollama/agent_framework_ollama/_chat_client.py index cc7fc0c9a7..e4f3d65ceb 100644 --- a/python/packages/ollama/agent_framework_ollama/_chat_client.py +++ b/python/packages/ollama/agent_framework_ollama/_chat_client.py @@ -268,6 +268,13 @@ class OllamaChatOptions(ChatOptions[ResponseModelT], Generic[ResponseModelT], to } """Maps ChatOptions keys to Ollama model option parameter names.""" +# Framework-level kwargs that are not supported by ollama.AsyncClient.chat(). +# These are silently stripped in _inner_get_response so they never reach the +# Ollama API. +_UNSUPPORTED_CHAT_KWARGS: set[str] = { + "allow_multiple_tool_calls", +} + OllamaChatOptionsT = TypeVar("OllamaChatOptionsT", bound=TypedDict, default="OllamaChatOptions", covariant=True) # type: ignore[valid-type] @@ -351,6 +358,10 @@ def _inner_get_response( stream: bool = False, **kwargs: Any, ) -> Awaitable[ChatResponse] | ResponseStream[ChatResponseUpdate, ChatResponse]: + # Filter out framework-level kwargs that are not supported by + # ollama.AsyncClient.chat(). + filtered_kwargs = {k: v for k, v in kwargs.items() if k not in _UNSUPPORTED_CHAT_KWARGS} + if stream: # Streaming mode async def _stream() -> AsyncIterable[ChatResponseUpdate]: @@ -360,7 +371,7 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]: response_object: AsyncIterable[OllamaChatResponse] = await self.client.chat( # type: ignore[misc] stream=True, **options_dict, - **kwargs, + **filtered_kwargs, ) except Exception as ex: raise ChatClientException(f"Ollama streaming chat request failed : {ex}", ex) from ex @@ -378,7 +389,7 @@ async def _get_response() -> ChatResponse: response: OllamaChatResponse = await self.client.chat( # type: ignore[misc] stream=False, **options_dict, - **kwargs, + **filtered_kwargs, ) except Exception as ex: raise ChatClientException(f"Ollama chat request failed : {ex}", ex) from ex @@ -395,8 +406,9 @@ def _prepare_options(self, messages: Sequence[Message], options: Mapping[str, An messages = prepend_instructions_to_messages(list(messages), instructions, role="system") - # Keys to exclude from processing - exclude_keys = {"instructions", "tool_choice"} + # Keys to exclude from processing — these are either handled separately + # or not supported by the Ollama API. + exclude_keys = {"instructions", "tool_choice", "allow_multiple_tool_calls"} # Build run_options and model_options separately run_options: dict[str, Any] = {} diff --git a/python/packages/ollama/tests/test_ollama_chat_client.py b/python/packages/ollama/tests/test_ollama_chat_client.py index 6e10830bf4..81cafe0cfe 100644 --- a/python/packages/ollama/tests/test_ollama_chat_client.py +++ b/python/packages/ollama/tests/test_ollama_chat_client.py @@ -404,6 +404,93 @@ async def test_cmc_with_dict_tool_passthrough( assert call_kwargs["tools"] == [{"type": "function", "function": {"name": "custom_tool", "parameters": {}}}] +@patch.object(AsyncClient, "chat", new_callable=AsyncMock) +async def test_cmc_filters_unsupported_kwargs( + mock_chat: AsyncMock, + ollama_unit_test_env: dict[str, str], + chat_history: list[Message], + mock_chat_completion_response: OllamaChatResponse, +) -> None: + """Verify that unsupported kwargs (e.g. allow_multiple_tool_calls) are + silently filtered out and never forwarded to ollama.AsyncClient.chat(). + + Regression test for https://github.com/microsoft/agent-framework/issues/4402 + """ + mock_chat.return_value = mock_chat_completion_response + chat_history.append(Message(text="hello world", role="user")) + + ollama_client = OllamaChatClient() + # Pass allow_multiple_tool_calls as a top-level kwarg — this is what HandoffBuilder does + await ollama_client.get_response( + messages=chat_history, + allow_multiple_tool_calls=True, + ) + + # Verify the call succeeded and allow_multiple_tool_calls was NOT forwarded + mock_chat.assert_called_once() + call_kwargs = mock_chat.call_args.kwargs + assert "allow_multiple_tool_calls" not in call_kwargs + + +@patch.object(AsyncClient, "chat", new_callable=AsyncMock) +async def test_cmc_streaming_filters_unsupported_kwargs( + mock_chat: AsyncMock, + ollama_unit_test_env: dict[str, str], + chat_history: list[Message], + mock_streaming_chat_completion_response: AsyncStream[OllamaChatResponse], +) -> None: + """Verify that unsupported kwargs are filtered in streaming mode too. + + Regression test for https://github.com/microsoft/agent-framework/issues/4402 + """ + mock_chat.return_value = mock_streaming_chat_completion_response + chat_history.append(Message(text="hello world", role="user")) + + ollama_client = OllamaChatClient() + result = ollama_client.get_response( + messages=chat_history, + stream=True, + allow_multiple_tool_calls=True, + ) + + async for chunk in result: + assert chunk.text == "test" + + # Verify allow_multiple_tool_calls was NOT forwarded + mock_chat.assert_called_once() + call_kwargs = mock_chat.call_args.kwargs + assert "allow_multiple_tool_calls" not in call_kwargs + + +@patch.object(AsyncClient, "chat", new_callable=AsyncMock) +async def test_cmc_filters_unsupported_options( + mock_chat: AsyncMock, + ollama_unit_test_env: dict[str, str], + chat_history: list[Message], + mock_chat_completion_response: OllamaChatResponse, +) -> None: + """Verify that unsupported keys inside the options dict (e.g. from + Agent.default_options or workflow cloning) are also stripped before + reaching ollama.AsyncClient.chat(). + + Regression test for https://github.com/microsoft/agent-framework/issues/4402 + """ + mock_chat.return_value = mock_chat_completion_response + chat_history.append(Message(text="hello world", role="user")) + + ollama_client = OllamaChatClient() + # Pass allow_multiple_tool_calls inside the options dict + await ollama_client.get_response( + messages=chat_history, + options={"allow_multiple_tool_calls": True}, + ) + + # Verify the call succeeded and allow_multiple_tool_calls was NOT forwarded + mock_chat.assert_called_once() + call_kwargs = mock_chat.call_args.kwargs + assert "allow_multiple_tool_calls" not in call_kwargs + + @patch.object(AsyncClient, "chat", new_callable=AsyncMock) async def test_cmc_with_data_content_type( mock_chat: AsyncMock,