Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 16 additions & 4 deletions python/packages/ollama/agent_framework_ollama/_chat_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,6 +268,13 @@ class OllamaChatOptions(ChatOptions[ResponseModelT], Generic[ResponseModelT], to
}
"""Maps ChatOptions keys to Ollama model option parameter names."""

# Framework-level kwargs that are not supported by ollama.AsyncClient.chat().
# These are silently stripped in _inner_get_response so they never reach the
# Ollama API.
_UNSUPPORTED_CHAT_KWARGS: set[str] = {
"allow_multiple_tool_calls",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this is not a approach we want to take, since that parameter is already set to None in the OllamaChatOptions, people will get notified by their IDE if that is set, if that is being set by something like a workflow then we need to update that. The reason we don't want to hardcode filtering like this is because we want to not block future updates, let's say at some point Ollama does implement support for this parameter, then that will not be usable, because we filter it out, so we decided we would prefer to use the None above to notify the user beforehand that it is likely not right, and then we just let the API tell the user exactly what's wrong.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So reading more of the above, the real issue is that the HandoffBuilder set's parameters that are not universally applicable, which is what we would need to fix.

}

OllamaChatOptionsT = TypeVar("OllamaChatOptionsT", bound=TypedDict, default="OllamaChatOptions", covariant=True) # type: ignore[valid-type]


Expand Down Expand Up @@ -351,6 +358,10 @@ def _inner_get_response(
stream: bool = False,
**kwargs: Any,
) -> Awaitable[ChatResponse] | ResponseStream[ChatResponseUpdate, ChatResponse]:
# Filter out framework-level kwargs that are not supported by
# ollama.AsyncClient.chat().
filtered_kwargs = {k: v for k, v in kwargs.items() if k not in _UNSUPPORTED_CHAT_KWARGS}

if stream:
# Streaming mode
async def _stream() -> AsyncIterable[ChatResponseUpdate]:
Expand All @@ -360,7 +371,7 @@ async def _stream() -> AsyncIterable[ChatResponseUpdate]:
response_object: AsyncIterable[OllamaChatResponse] = await self.client.chat( # type: ignore[misc]
stream=True,
**options_dict,
**kwargs,
**filtered_kwargs,
)
except Exception as ex:
raise ChatClientException(f"Ollama streaming chat request failed : {ex}", ex) from ex
Expand All @@ -378,7 +389,7 @@ async def _get_response() -> ChatResponse:
response: OllamaChatResponse = await self.client.chat( # type: ignore[misc]
stream=False,
**options_dict,
**kwargs,
**filtered_kwargs,
)
except Exception as ex:
raise ChatClientException(f"Ollama chat request failed : {ex}", ex) from ex
Expand All @@ -395,8 +406,9 @@ def _prepare_options(self, messages: Sequence[Message], options: Mapping[str, An

messages = prepend_instructions_to_messages(list(messages), instructions, role="system")

# Keys to exclude from processing
exclude_keys = {"instructions", "tool_choice"}
# Keys to exclude from processing — these are either handled separately
# or not supported by the Ollama API.
exclude_keys = {"instructions", "tool_choice", "allow_multiple_tool_calls"}

# Build run_options and model_options separately
run_options: dict[str, Any] = {}
Expand Down
87 changes: 87 additions & 0 deletions python/packages/ollama/tests/test_ollama_chat_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -404,6 +404,93 @@ async def test_cmc_with_dict_tool_passthrough(
assert call_kwargs["tools"] == [{"type": "function", "function": {"name": "custom_tool", "parameters": {}}}]


@patch.object(AsyncClient, "chat", new_callable=AsyncMock)
async def test_cmc_filters_unsupported_kwargs(
mock_chat: AsyncMock,
ollama_unit_test_env: dict[str, str],
chat_history: list[Message],
mock_chat_completion_response: OllamaChatResponse,
) -> None:
"""Verify that unsupported kwargs (e.g. allow_multiple_tool_calls) are
silently filtered out and never forwarded to ollama.AsyncClient.chat().

Regression test for https://github.com/microsoft/agent-framework/issues/4402
"""
mock_chat.return_value = mock_chat_completion_response
chat_history.append(Message(text="hello world", role="user"))

ollama_client = OllamaChatClient()
# Pass allow_multiple_tool_calls as a top-level kwarg — this is what HandoffBuilder does
await ollama_client.get_response(
messages=chat_history,
allow_multiple_tool_calls=True,
)

# Verify the call succeeded and allow_multiple_tool_calls was NOT forwarded
mock_chat.assert_called_once()
call_kwargs = mock_chat.call_args.kwargs
assert "allow_multiple_tool_calls" not in call_kwargs


@patch.object(AsyncClient, "chat", new_callable=AsyncMock)
async def test_cmc_streaming_filters_unsupported_kwargs(
mock_chat: AsyncMock,
ollama_unit_test_env: dict[str, str],
chat_history: list[Message],
mock_streaming_chat_completion_response: AsyncStream[OllamaChatResponse],
) -> None:
"""Verify that unsupported kwargs are filtered in streaming mode too.

Regression test for https://github.com/microsoft/agent-framework/issues/4402
"""
mock_chat.return_value = mock_streaming_chat_completion_response
chat_history.append(Message(text="hello world", role="user"))

ollama_client = OllamaChatClient()
result = ollama_client.get_response(
messages=chat_history,
stream=True,
allow_multiple_tool_calls=True,
)

async for chunk in result:
assert chunk.text == "test"

# Verify allow_multiple_tool_calls was NOT forwarded
mock_chat.assert_called_once()
call_kwargs = mock_chat.call_args.kwargs
assert "allow_multiple_tool_calls" not in call_kwargs


@patch.object(AsyncClient, "chat", new_callable=AsyncMock)
async def test_cmc_filters_unsupported_options(
mock_chat: AsyncMock,
ollama_unit_test_env: dict[str, str],
chat_history: list[Message],
mock_chat_completion_response: OllamaChatResponse,
) -> None:
"""Verify that unsupported keys inside the options dict (e.g. from
Agent.default_options or workflow cloning) are also stripped before
reaching ollama.AsyncClient.chat().

Regression test for https://github.com/microsoft/agent-framework/issues/4402
"""
mock_chat.return_value = mock_chat_completion_response
chat_history.append(Message(text="hello world", role="user"))

ollama_client = OllamaChatClient()
# Pass allow_multiple_tool_calls inside the options dict
await ollama_client.get_response(
messages=chat_history,
options={"allow_multiple_tool_calls": True},
)

# Verify the call succeeded and allow_multiple_tool_calls was NOT forwarded
mock_chat.assert_called_once()
call_kwargs = mock_chat.call_args.kwargs
assert "allow_multiple_tool_calls" not in call_kwargs


@patch.object(AsyncClient, "chat", new_callable=AsyncMock)
async def test_cmc_with_data_content_type(
mock_chat: AsyncMock,
Expand Down