diff --git a/python/packages/anthropic/agent_framework_anthropic/_chat_client.py b/python/packages/anthropic/agent_framework_anthropic/_chat_client.py index acfc1b0180..f9c2b99a6b 100644 --- a/python/packages/anthropic/agent_framework_anthropic/_chat_client.py +++ b/python/packages/anthropic/agent_framework_anthropic/_chat_client.py @@ -4,7 +4,7 @@ import logging import sys -from collections.abc import AsyncIterable, Awaitable, Mapping, MutableMapping, Sequence +from collections.abc import AsyncIterable, Awaitable, Callable, Mapping, MutableMapping, Sequence from typing import Any, ClassVar, Final, Generic, Literal, TypedDict from agent_framework import ( @@ -25,8 +25,10 @@ ResponseStream, TextSpanRegion, UsageDetails, + tool, ) from agent_framework._settings import SecretString, load_settings +from agent_framework._tools import SHELL_TOOL_KIND_VALUE from agent_framework._types import _get_data_bytes_as_str # type: ignore from agent_framework.observability import ChatTelemetryLayer from anthropic import AsyncAnthropic @@ -326,6 +328,7 @@ class MyOptions(AnthropicChatOptions, total=False): # streaming requires tracking the last function call ID, name, and content type self._last_call_id_name: tuple[str, str] | None = None self._last_call_content_type: str | None = None + self._tool_name_aliases: dict[str, str] = {} # region Static factory methods for hosted tools @@ -379,6 +382,57 @@ def get_web_search_tool( """ return {"type": type_name or "web_search_20250305", "name": name} + @staticmethod + def get_shell_tool( + *, + func: Callable[..., Any] | FunctionTool, + description: str | None = None, + type_name: str | None = None, + approval_mode: Literal["always_require", "never_require"] | None = None, + ) -> FunctionTool: + """Create a local shell FunctionTool for Anthropic. + + This helper wraps ``func`` as a shell-enabled ``FunctionTool`` for local + execution and configures Anthropic API declaration details via metadata. + + Anthropic always exposes this tool to the model as ``name="bash"`` and + executes it using a ``bash_*`` tool type. + + Keyword Args: + func: Python callable or ``FunctionTool`` that executes the requested shell command. + description: Optional tool description shown to the model. + type_name: Optional Anthropic shell tool type override. + Defaults to ``"bash_20250124"`` when omitted. + approval_mode: Optional approval mode for local execution. + + Returns: + A shell-enabled ``FunctionTool`` suitable for ``ChatOptions.tools``. + """ + base_tool: FunctionTool + if isinstance(func, FunctionTool): + base_tool = func + if description is not None: + base_tool.description = description + if approval_mode is not None: + base_tool.approval_mode = approval_mode + else: + base_tool = tool( + func=func, + description=description, + approval_mode=approval_mode, + ) + + additional_properties: dict[str, Any] = dict(base_tool.additional_properties or {}) + if type_name: + additional_properties["type"] = type_name + + if base_tool.func is None: + raise ValueError("Shell tool requires an executable function.") + + base_tool.additional_properties = additional_properties + base_tool.kind = SHELL_TOOL_KIND_VALUE + return base_tool + @staticmethod def get_mcp_tool( *, @@ -715,8 +769,16 @@ def _prepare_tools_for_anthropic(self, options: Mapping[str, Any]) -> dict[str, if tools: tool_list: list[Any] = [] mcp_server_list: list[Any] = [] + tool_name_aliases: dict[str, str] = {} for tool in tools: - if isinstance(tool, FunctionTool): + if isinstance(tool, FunctionTool) and tool.kind == SHELL_TOOL_KIND_VALUE: + api_type = (tool.additional_properties or {}).get("type", "bash_20250124") + tool_name_aliases["bash"] = tool.name + tool_list.append({ + "type": api_type, + "name": "bash", + }) + elif isinstance(tool, FunctionTool): tool_list.append({ "type": "custom", "name": tool.name, @@ -744,6 +806,9 @@ def _prepare_tools_for_anthropic(self, options: Mapping[str, Any]) -> dict[str, result["tools"] = tool_list if mcp_server_list: result["mcp_servers"] = mcp_server_list + self._tool_name_aliases = tool_name_aliases + else: + self._tool_name_aliases = {} # Process tool choice if options.get("tool_choice") is None: @@ -760,9 +825,18 @@ def _prepare_tools_for_anthropic(self, options: Mapping[str, Any]) -> dict[str, result["tool_choice"] = tool_choice case "required": if "required_function_name" in tool_mode: + required_name = tool_mode["required_function_name"] + api_tool_name = next( + ( + api_name + for api_name, local_name in self._tool_name_aliases.items() + if local_name == required_name + ), + required_name, + ) tool_choice = { "type": "tool", - "name": tool_mode["required_function_name"], + "name": api_tool_name, } else: tool_choice = {"type": "any"} @@ -914,10 +988,11 @@ def _parse_contents_from_anthropic( ) ) else: + resolved_tool_name = self._tool_name_aliases.get(content_block.name, content_block.name) contents.append( Content.from_function_call( call_id=content_block.id, - name=content_block.name, + name=resolved_tool_name, arguments=content_block.input, raw_representation=content_block, ) @@ -1006,33 +1081,29 @@ def _parse_contents_from_anthropic( ) ) case "bash_code_execution_tool_result": - bash_outputs: list[Content] = [] + shell_outputs: list[Content] = [] if content_block.content: if isinstance( content_block.content, BetaBashCodeExecutionToolResultError, ): - bash_outputs.append( - Content.from_error( - message=content_block.content.error_code, + shell_outputs.append( + Content.from_shell_command_output( + stderr=content_block.content.error_code, + timed_out=content_block.content.error_code == "execution_time_exceeded", raw_representation=content_block.content, ) ) else: - if content_block.content.stdout: - bash_outputs.append( - Content.from_text( - text=content_block.content.stdout, - raw_representation=content_block.content, - ) - ) - if content_block.content.stderr: - bash_outputs.append( - Content.from_error( - message=content_block.content.stderr, - raw_representation=content_block.content, - ) + shell_outputs.append( + Content.from_shell_command_output( + stdout=content_block.content.stdout or None, + stderr=content_block.content.stderr or None, + exit_code=int(content_block.content.return_code), + timed_out=False, + raw_representation=content_block.content, ) + ) for bash_file_content in content_block.content.content: contents.append( Content.from_hosted_file( @@ -1041,9 +1112,9 @@ def _parse_contents_from_anthropic( ) ) contents.append( - Content.from_function_result( + Content.from_shell_tool_result( call_id=content_block.tool_use_id, - result=bash_outputs, + outputs=shell_outputs, raw_representation=content_block, ) ) diff --git a/python/packages/anthropic/tests/test_anthropic_client.py b/python/packages/anthropic/tests/test_anthropic_client.py index d7c4c9afc7..028e49673a 100644 --- a/python/packages/anthropic/tests/test_anthropic_client.py +++ b/python/packages/anthropic/tests/test_anthropic_client.py @@ -14,6 +14,7 @@ tool, ) from agent_framework._settings import load_settings +from agent_framework._tools import SHELL_TOOL_KIND_VALUE from anthropic.types.beta import ( BetaMessage, BetaTextBlock, @@ -40,6 +41,8 @@ def create_test_anthropic_client( anthropic_settings: AnthropicSettings | None = None, ) -> AnthropicClient: """Helper function to create AnthropicClient instances for testing, bypassing normal validation.""" + from agent_framework._tools import normalize_function_invocation_configuration + if anthropic_settings is None: anthropic_settings = load_settings( AnthropicSettings, @@ -55,9 +58,13 @@ def create_test_anthropic_client( client.anthropic_client = mock_anthropic_client client.model_id = model_id or anthropic_settings["chat_model_id"] client._last_call_id_name = None + client._tool_name_aliases = {} client.additional_properties = {} client.middleware = None client.additional_beta_flags = [] + client.chat_middleware = [] + client.function_middleware = [] + client.function_invocation_configuration = normalize_function_invocation_configuration(None) return client @@ -410,6 +417,87 @@ def test_prepare_tools_for_anthropic_code_interpreter(mock_anthropic_client: Mag assert result["tools"][0]["name"] == "code_execution" +def _dummy_bash(command: str) -> str: + return f"executed: {command}" + + +def test_prepare_tools_for_anthropic_shell_tool(mock_anthropic_client: MagicMock) -> None: + """Test converting tool-decorated FunctionTool to Anthropic bash format.""" + client = create_test_anthropic_client(mock_anthropic_client) + + @tool(kind=SHELL_TOOL_KIND_VALUE) + def run_bash(command: str) -> str: + return _dummy_bash(command) + + chat_options = ChatOptions(tools=[run_bash]) + + result = client._prepare_tools_for_anthropic(chat_options) + + assert result is not None + assert "tools" in result + assert len(result["tools"]) == 1 + assert result["tools"][0]["type"] == "bash_20250124" + assert result["tools"][0]["name"] == "bash" + + +def test_prepare_tools_for_anthropic_shell_tool_custom_type(mock_anthropic_client: MagicMock) -> None: + """Test shell tool with custom type via additional_properties.""" + client = create_test_anthropic_client(mock_anthropic_client) + + @tool(kind=SHELL_TOOL_KIND_VALUE, additional_properties={"type": "bash_20241022"}) + def run_bash(command: str) -> str: + return _dummy_bash(command) + + chat_options = ChatOptions(tools=[run_bash]) + + result = client._prepare_tools_for_anthropic(chat_options) + + assert result is not None + assert "tools" in result + assert result["tools"][0]["type"] == "bash_20241022" + assert result["tools"][0]["name"] == "bash" + + +def test_prepare_tools_for_anthropic_shell_tool_does_not_mutate_name(mock_anthropic_client: MagicMock) -> None: + """Shell tool API name should be 'bash' without mutating local FunctionTool name.""" + client = create_test_anthropic_client(mock_anthropic_client) + + @tool( + name="run_local_shell", + approval_mode="never_require", + kind=SHELL_TOOL_KIND_VALUE, + ) + def run_local_shell(command: str) -> str: + return command + + chat_options = ChatOptions(tools=[run_local_shell]) + result = client._prepare_tools_for_anthropic(chat_options) + + assert result is not None + assert result["tools"][0]["name"] == "bash" + assert run_local_shell.name == "run_local_shell" + + +def test_get_shell_tool_reuses_function_tool_instance(mock_anthropic_client: MagicMock) -> None: + """Passing a FunctionTool should update and return the same tool instance.""" + client = create_test_anthropic_client(mock_anthropic_client) + + @tool(name="run_shell", approval_mode="never_require") + def run_shell(command: str) -> str: + return command + + shell_tool = client.get_shell_tool( + func=run_shell, + description="Run local bash", + approval_mode="always_require", + ) + + assert shell_tool is run_shell + assert shell_tool.kind == SHELL_TOOL_KIND_VALUE + assert shell_tool.description == "Run local bash" + assert shell_tool.approval_mode == "always_require" + + def test_prepare_tools_for_anthropic_mcp_tool(mock_anthropic_client: MagicMock) -> None: """Test converting MCP dict tool to Anthropic format.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -502,6 +590,62 @@ async def test_prepare_options_with_system_message(mock_anthropic_client: MagicM assert len(run_options["messages"]) == 1 # System message not in messages list +async def test_anthropic_shell_tool_is_invoked_in_function_loop(mock_anthropic_client: MagicMock) -> None: + """Function invocation loop should execute shell tool when Anthropic returns bash tool_use.""" + client = create_test_anthropic_client(mock_anthropic_client) + executed_commands: list[str] = [] + + def run_local_shell(command: str) -> str: + executed_commands.append(command) + return f"executed: {command}" + + shell_tool_instance = client.get_shell_tool(func=run_local_shell, approval_mode="never_require") + + mock_tool_use = MagicMock() + mock_tool_use.type = "tool_use" + mock_tool_use.id = "call_bash_loop" + mock_tool_use.name = "bash" + mock_tool_use.input = {"command": "pwd"} + + first_message = MagicMock() + first_message.id = "msg_1" + first_message.content = [mock_tool_use] + first_message.usage = None + first_message.model = "claude-test" + first_message.stop_reason = "tool_use" + + mock_text_block = MagicMock() + mock_text_block.type = "text" + mock_text_block.text = "Done" + + second_message = MagicMock() + second_message.id = "msg_2" + second_message.content = [mock_text_block] + second_message.usage = None + second_message.model = "claude-test" + second_message.stop_reason = "end_turn" + + mock_anthropic_client.beta.messages.create.side_effect = [first_message, second_message] + + await client.get_response( + messages=[Message(role="user", text="Run pwd")], + options={"tools": [shell_tool_instance], "max_tokens": 64}, + ) + + assert executed_commands == ["pwd"] + assert mock_anthropic_client.beta.messages.create.call_count == 2 + second_request_messages = mock_anthropic_client.beta.messages.create.call_args_list[1].kwargs["messages"] + tool_results = [ + block + for message in second_request_messages + for block in message.get("content", []) + if block.get("type") == "tool_result" + ] + assert len(tool_results) == 1 + assert tool_results[0]["tool_use_id"] == "call_bash_loop" + assert "executed: pwd" in tool_results[0]["content"] + + async def test_prepare_options_with_tool_choice_auto(mock_anthropic_client: MagicMock) -> None: """Test _prepare_options with auto tool choice.""" client = create_test_anthropic_client(mock_anthropic_client) @@ -1733,7 +1877,7 @@ def test_parse_code_execution_result_with_files(mock_anthropic_client: MagicMock def test_parse_bash_execution_result_with_stdout(mock_anthropic_client: MagicMock) -> None: - """Test parsing bash execution result with stdout.""" + """Test parsing bash execution result with stdout produces shell_tool_result.""" client = create_test_anthropic_client(mock_anthropic_client) client._last_call_id_name = ("call_bash2", "bash_code_execution") @@ -1741,6 +1885,7 @@ def test_parse_bash_execution_result_with_stdout(mock_anthropic_client: MagicMoc mock_content = MagicMock() mock_content.stdout = "Output text" mock_content.stderr = None + mock_content.return_code = 0 mock_content.content = [] mock_block = MagicMock() @@ -1751,11 +1896,18 @@ def test_parse_bash_execution_result_with_stdout(mock_anthropic_client: MagicMoc result = client._parse_contents_from_anthropic([mock_block]) assert len(result) == 1 - assert result[0].type == "function_result" + assert result[0].type == "shell_tool_result" + assert result[0].call_id == "call_bash2" + assert result[0].outputs is not None + assert len(result[0].outputs) == 1 + assert result[0].outputs[0].type == "shell_command_output" + assert result[0].outputs[0].stdout == "Output text" + assert result[0].outputs[0].exit_code == 0 + assert result[0].outputs[0].timed_out is False def test_parse_bash_execution_result_with_stderr(mock_anthropic_client: MagicMock) -> None: - """Test parsing bash execution result with stderr.""" + """Test parsing bash execution result with stderr produces shell_tool_result.""" client = create_test_anthropic_client(mock_anthropic_client) client._last_call_id_name = ("call_bash3", "bash_code_execution") @@ -1763,6 +1915,7 @@ def test_parse_bash_execution_result_with_stderr(mock_anthropic_client: MagicMoc mock_content = MagicMock() mock_content.stdout = None mock_content.stderr = "Error output" + mock_content.return_code = 1 mock_content.content = [] mock_block = MagicMock() @@ -1773,7 +1926,39 @@ def test_parse_bash_execution_result_with_stderr(mock_anthropic_client: MagicMoc result = client._parse_contents_from_anthropic([mock_block]) assert len(result) == 1 - assert result[0].type == "function_result" + assert result[0].type == "shell_tool_result" + assert result[0].call_id == "call_bash3" + assert result[0].outputs is not None + assert result[0].outputs[0].type == "shell_command_output" + assert result[0].outputs[0].stderr == "Error output" + assert result[0].outputs[0].exit_code == 1 + + +def test_parse_bash_execution_result_with_error(mock_anthropic_client: MagicMock) -> None: + """Test parsing bash execution error produces shell_tool_result with error info.""" + from anthropic.types.beta.beta_bash_code_execution_tool_result_error import ( + BetaBashCodeExecutionToolResultError, + ) + + client = create_test_anthropic_client(mock_anthropic_client) + client._last_call_id_name = ("call_bash_err", "bash_code_execution") + + mock_error = MagicMock(spec=BetaBashCodeExecutionToolResultError) + mock_error.error_code = "execution_time_exceeded" + + mock_block = MagicMock() + mock_block.type = "bash_code_execution_tool_result" + mock_block.tool_use_id = "call_bash_err" + mock_block.content = mock_error + + result = client._parse_contents_from_anthropic([mock_block]) + + assert len(result) == 1 + assert result[0].type == "shell_tool_result" + assert result[0].outputs is not None + assert result[0].outputs[0].type == "shell_command_output" + assert result[0].outputs[0].stderr == "execution_time_exceeded" + assert result[0].outputs[0].timed_out is True # Text Editor Result Tests diff --git a/python/packages/core/agent_framework/_agents.py b/python/packages/core/agent_framework/_agents.py index a519796b17..8f477f9223 100644 --- a/python/packages/core/agent_framework/_agents.py +++ b/python/packages/core/agent_framework/_agents.py @@ -947,7 +947,11 @@ def _propagate_conversation_id(update: AgentResponseUpdate) -> AgentResponseUpda def _finalizer(updates: Sequence[AgentResponseUpdate]) -> AgentResponse[Any]: ctx = ctx_holder["ctx"] - rf = ctx.get("chat_options", {}).get("response_format") if ctx else (options.get("response_format") if options else None) + rf = ( + ctx.get("chat_options", {}).get("response_format") + if ctx + else (options.get("response_format") if options else None) + ) return self._finalize_response_updates(updates, response_format=rf) return ( diff --git a/python/packages/core/agent_framework/_tools.py b/python/packages/core/agent_framework/_tools.py index 3ec167d4f7..303699572c 100644 --- a/python/packages/core/agent_framework/_tools.py +++ b/python/packages/core/agent_framework/_tools.py @@ -79,6 +79,7 @@ DEFAULT_MAX_ITERATIONS: Final[int] = 40 DEFAULT_MAX_CONSECUTIVE_ERRORS_PER_REQUEST: Final[int] = 3 +SHELL_TOOL_KIND_VALUE: Final[str] = "shell" ChatClientT = TypeVar("ChatClientT", bound="SupportsChatGetResponse[Any]") # region Helpers @@ -237,6 +238,7 @@ def __init__( name: str, description: str = "", approval_mode: Literal["always_require", "never_require"] | None = None, + kind: str | None = None, max_invocations: int | None = None, max_invocation_exceptions: int | None = None, additional_properties: dict[str, Any] | None = None, @@ -252,6 +254,8 @@ def __init__( description: A description of the function. approval_mode: Whether or not approval is required to run this tool. Default is that approval is NOT required (``"never_require"``). + kind: Optional provider-agnostic tool classification + (for example ``"shell"``). max_invocations: The maximum number of times this function can be invoked across the **lifetime of this tool instance**. If None (default), there is no limit. Should be at least 1. If the tool is called multiple @@ -296,6 +300,7 @@ def __init__( # Core attributes (formerly from BaseTool) self.name = name self.description = description + self.kind = kind self.additional_properties = additional_properties for key, value in kwargs.items(): setattr(self, key, value) @@ -1077,6 +1082,7 @@ def tool( description: str | None = None, schema: type[BaseModel] | Mapping[str, Any] | None = None, approval_mode: Literal["always_require", "never_require"] | None = None, + kind: str | None = None, max_invocations: int | None = None, max_invocation_exceptions: int | None = None, additional_properties: dict[str, Any] | None = None, @@ -1092,6 +1098,7 @@ def tool( description: str | None = None, schema: type[BaseModel] | Mapping[str, Any] | None = None, approval_mode: Literal["always_require", "never_require"] | None = None, + kind: str | None = None, max_invocations: int | None = None, max_invocation_exceptions: int | None = None, additional_properties: dict[str, Any] | None = None, @@ -1106,6 +1113,7 @@ def tool( description: str | None = None, schema: type[BaseModel] | Mapping[str, Any] | None = None, approval_mode: Literal["always_require", "never_require"] | None = None, + kind: str | None = None, max_invocations: int | None = None, max_invocation_exceptions: int | None = None, additional_properties: dict[str, Any] | None = None, @@ -1145,6 +1153,7 @@ def tool( function's signature. Defaults to ``None`` (infer from signature). approval_mode: Whether or not approval is required to run this tool. Default is that approval is NOT required (``"never_require"``). + kind: Optional provider-agnostic tool classification. max_invocations: The maximum number of times this function can be invoked across the **lifetime of this tool instance**. If None (default), there is no limit. Should be at least 1. For per-request limits, use @@ -1245,6 +1254,7 @@ def wrapper(f: Callable[..., Any]) -> FunctionTool: name=tool_name, description=tool_desc, approval_mode=approval_mode, + kind=kind, max_invocations=max_invocations, max_invocation_exceptions=max_invocation_exceptions, additional_properties=additional_properties or {}, @@ -1390,6 +1400,7 @@ async def _auto_invoke_function( call_id=function_call_content.call_id, # type: ignore[arg-type] result=f'Error: Requested function "{function_call_content.name}" not found.', exception=str(exc), # type: ignore[arg-type] + additional_properties=function_call_content.additional_properties, ) else: # Note: Unapproved tools (approved=False) are handled in _replace_approval_contents_with_results @@ -1430,6 +1441,7 @@ async def _auto_invoke_function( call_id=function_call_content.call_id, # type: ignore[arg-type] result=message, exception=str(exc), # type: ignore[arg-type] + additional_properties=function_call_content.additional_properties, ) if middleware_pipeline is None or not middleware_pipeline.has_middlewares: @@ -1443,6 +1455,7 @@ async def _auto_invoke_function( return Content.from_function_result( call_id=function_call_content.call_id, # type: ignore[arg-type] result=function_result, + additional_properties=function_call_content.additional_properties, ) except Exception as exc: message = "Error: Function failed." @@ -1452,6 +1465,7 @@ async def _auto_invoke_function( call_id=function_call_content.call_id, # type: ignore[arg-type] result=message, exception=str(exc), + additional_properties=function_call_content.additional_properties, ) # Execute through middleware pipeline if available from ._middleware import FunctionInvocationContext @@ -1477,6 +1491,7 @@ async def final_function_handler(context_obj: Any) -> Any: return Content.from_function_result( call_id=function_call_content.call_id, # type: ignore[arg-type] result=function_result, + additional_properties=function_call_content.additional_properties, ) except MiddlewareTermination as term_exc: # Re-raise to signal loop termination, but first capture any result set by middleware @@ -1485,6 +1500,7 @@ async def final_function_handler(context_obj: Any) -> Any: term_exc.result = Content.from_function_result( call_id=function_call_content.call_id, # type: ignore[arg-type] result=middleware_context.result, + additional_properties=function_call_content.additional_properties, ) raise except Exception as exc: @@ -1495,6 +1511,7 @@ async def final_function_handler(context_obj: Any) -> Any: call_id=function_call_content.call_id, # type: ignore[arg-type] result=message, exception=str(exc), # type: ignore[arg-type] + additional_properties=function_call_content.additional_properties, ) diff --git a/python/packages/core/agent_framework/_types.py b/python/packages/core/agent_framework/_types.py index 3df0bb20fb..beed97834c 100644 --- a/python/packages/core/agent_framework/_types.py +++ b/python/packages/core/agent_framework/_types.py @@ -340,6 +340,9 @@ def _serialize_value(value: Any, exclude_none: bool) -> Any: "image_generation_tool_result", "mcp_server_tool_call", "mcp_server_tool_result", + "shell_tool_call", + "shell_tool_result", + "shell_command_output", "function_approval_request", "function_approval_response", ] @@ -476,6 +479,16 @@ def __init__( outputs: list[Content] | Any | None = None, # Image generation tool fields image_id: str | None = None, + # Shell tool fields + commands: list[str] | None = None, + timeout_ms: int | None = None, + max_output_length: int | None = None, + status: str | None = None, + # Shell command output fields + stdout: str | None = None, + stderr: str | None = None, + exit_code: int | None = None, + timed_out: bool | None = None, # MCP server tool fields tool_name: str | None = None, server_name: str | None = None, @@ -518,6 +531,14 @@ def __init__( self.inputs = inputs self.outputs = outputs self.image_id = image_id + self.commands = commands + self.timeout_ms = timeout_ms + self.max_output_length = max_output_length + self.status = status + self.stdout = stdout + self.stderr = stderr + self.exit_code = exit_code + self.timed_out = timed_out self.tool_name = tool_name self.server_name = server_name self.output = output @@ -908,6 +929,112 @@ def from_image_generation_tool_result( raw_representation=raw_representation, ) + @classmethod + def from_shell_tool_call( + cls: type[ContentT], + *, + call_id: str | None = None, + commands: list[str] | None = None, + timeout_ms: int | None = None, + max_output_length: int | None = None, + status: str | None = None, + annotations: Sequence[Annotation] | None = None, + additional_properties: MutableMapping[str, Any] | None = None, + raw_representation: Any = None, + ) -> ContentT: + """Create shell tool call content. + + This content represents the model's request to run one or more shell + commands. It is request metadata, not command output. + + Keyword Args: + call_id: The unique identifier for this tool call. + commands: The list of commands to execute. + timeout_ms: The timeout in milliseconds for the shell command execution. + max_output_length: The maximum output length in characters. + status: The status of the shell call (e.g., "in_progress", "completed", "incomplete"). + annotations: Optional annotations for this content. + additional_properties: Optional additional properties. + raw_representation: The raw provider-specific representation. + """ + return cls( + "shell_tool_call", + call_id=call_id, + commands=commands, + timeout_ms=timeout_ms, + max_output_length=max_output_length, + status=status, + annotations=annotations, + additional_properties=additional_properties, + raw_representation=raw_representation, + ) + + @classmethod + def from_shell_tool_result( + cls: type[ContentT], + *, + call_id: str | None = None, + outputs: Sequence[Content] | None = None, + max_output_length: int | None = None, + annotations: Sequence[Annotation] | None = None, + additional_properties: MutableMapping[str, Any] | None = None, + raw_representation: Any = None, + ) -> ContentT: + """Create shell tool result content. + + This content represents the aggregate result for a shell tool call. + Use :meth:`from_shell_command_output` to build each per-command output + item and pass those objects via ``outputs``. + + Keyword Args: + call_id: The function call ID for which this is the result. + outputs: The list of shell command output Content objects. + max_output_length: The maximum output length in characters. + annotations: Optional annotations for this content. + additional_properties: Optional additional properties. + raw_representation: The raw provider-specific representation. + """ + return cls( + "shell_tool_result", + call_id=call_id, + outputs=list(outputs) if outputs is not None else None, + max_output_length=max_output_length, + annotations=annotations, + additional_properties=additional_properties, + raw_representation=raw_representation, + ) + + @classmethod + def from_shell_command_output( + cls: type[ContentT], + *, + stdout: str | None = None, + stderr: str | None = None, + exit_code: int | None = None, + timed_out: bool | None = None, + additional_properties: MutableMapping[str, Any] | None = None, + raw_representation: Any = None, + ) -> ContentT: + """Create shell command output content for one command execution. + + Keyword Args: + stdout: The standard output of the command. + stderr: The standard error output of the command. + exit_code: The exit code of the command, or None if the command timed out. + timed_out: Whether the command execution timed out. + additional_properties: Optional additional properties. + raw_representation: The raw provider-specific representation. + """ + return cls( + "shell_command_output", + stdout=stdout, + stderr=stderr, + exit_code=exit_code, + timed_out=timed_out, + additional_properties=additional_properties, + raw_representation=raw_representation, + ) + @classmethod def from_mcp_server_tool_call( cls: type[ContentT], @@ -1034,6 +1161,14 @@ def to_dict(self, *, exclude_none: bool = True, exclude: set[str] | None = None) "inputs", "outputs", "image_id", + "commands", + "timeout_ms", + "max_output_length", + "status", + "stdout", + "stderr", + "exit_code", + "timed_out", "tool_name", "server_name", "output", diff --git a/python/packages/core/agent_framework/openai/_assistants_client.py b/python/packages/core/agent_framework/openai/_assistants_client.py index 1c8aafc94e..17b801a36a 100644 --- a/python/packages/core/agent_framework/openai/_assistants_client.py +++ b/python/packages/core/agent_framework/openai/_assistants_client.py @@ -639,9 +639,15 @@ async def _process_stream_events(self, stream: Any, thread_id: str) -> AsyncIter additional_properties=props, raw_representation=completed_annotation, ) - if completed_annotation.file_citation and completed_annotation.file_citation.file_id: + if ( + completed_annotation.file_citation + and completed_annotation.file_citation.file_id + ): ann["file_id"] = completed_annotation.file_citation.file_id - if completed_annotation.start_index is not None and completed_annotation.end_index is not None: + if ( + completed_annotation.start_index is not None + and completed_annotation.end_index is not None + ): ann["annotated_regions"] = [ TextSpanRegion( type="text_span", @@ -660,7 +666,10 @@ async def _process_stream_events(self, stream: Any, thread_id: str) -> AsyncIter ) if completed_annotation.file_path and completed_annotation.file_path.file_id: ann["file_id"] = completed_annotation.file_path.file_id - if completed_annotation.start_index is not None and completed_annotation.end_index is not None: + if ( + completed_annotation.start_index is not None + and completed_annotation.end_index is not None + ): ann["annotated_regions"] = [ TextSpanRegion( type="text_span", diff --git a/python/packages/core/agent_framework/openai/_responses_client.py b/python/packages/core/agent_framework/openai/_responses_client.py index 5ba0bbc686..f11b60b767 100644 --- a/python/packages/core/agent_framework/openai/_responses_client.py +++ b/python/packages/core/agent_framework/openai/_responses_client.py @@ -2,7 +2,9 @@ from __future__ import annotations +import json import logging +import shlex import sys from collections.abc import ( AsyncIterable, @@ -17,6 +19,7 @@ from typing import TYPE_CHECKING, Any, ClassVar, Generic, Literal, NoReturn, TypedDict, cast from openai import AsyncOpenAI, BadRequestError +from openai.types.responses import FunctionShellTool from openai.types.responses.file_search_tool_param import FileSearchToolParam from openai.types.responses.function_tool_param import FunctionToolParam from openai.types.responses.parsed_response import ( @@ -40,11 +43,13 @@ from .._middleware import ChatMiddlewareLayer from .._settings import load_settings from .._tools import ( + SHELL_TOOL_KIND_VALUE, FunctionInvocationConfiguration, FunctionInvocationLayer, FunctionTool, ToolTypes, normalize_tools, + tool, ) from .._types import ( Annotation, @@ -92,6 +97,12 @@ ) logger = logging.getLogger("agent_framework.openai") +OPENAI_SHELL_ENVIRONMENT_KEY = "openai.responses.shell.environment" +OPENAI_SHELL_OUTPUT_TYPE_KEY = "openai.responses.shell.output_type" +OPENAI_LOCAL_SHELL_CALL_ITEM_ID_KEY = "openai.responses.local_shell.call_item_id" +OPENAI_LOCAL_SHELL_COMMAND_PARTS_KEY = "openai.local_shell_command_parts" +OPENAI_SHELL_OUTPUT_TYPE_SHELL_CALL = "shell_call_output" +OPENAI_SHELL_OUTPUT_TYPE_LOCAL_SHELL_CALL = "local_shell_call_output" class OpenAIContinuationToken(ContinuationToken): @@ -432,7 +443,9 @@ def _prepare_tools_for_openai( ) -> list[Any]: """Prepare tools for the OpenAI Responses API. - Converts FunctionTool to Responses API format. All other tools pass through unchanged. + Converts FunctionTool to Responses API format. Shell-enabled FunctionTools + with explicit shell environment metadata are mapped to OpenAI shell tools. + All other tools pass through unchanged. Args: tools: A single tool or sequence of tools to prepare. @@ -444,24 +457,49 @@ def _prepare_tools_for_openai( if not tools_list: return [] response_tools: list[Any] = [] - for tool in tools_list: - if isinstance(tool, FunctionTool): - params = tool.parameters() + for tool_item in tools_list: + if isinstance(tool_item, FunctionTool) and tool_item.kind == SHELL_TOOL_KIND_VALUE: + shell_env = (tool_item.additional_properties or {}).get(OPENAI_SHELL_ENVIRONMENT_KEY) + if isinstance(shell_env, Mapping): + response_tools.append( + FunctionShellTool( + type="shell", + environment=dict(shell_env), + ) + ) + continue + if isinstance(tool_item, FunctionTool): + params = tool_item.parameters() params["additionalProperties"] = False response_tools.append( FunctionToolParam( - name=tool.name, + name=tool_item.name, parameters=params, strict=False, type="function", - description=tool.description, + description=tool_item.description, ) ) else: # Pass through all other tools (dicts, SDK types) unchanged - response_tools.append(tool) + response_tools.append(tool_item) return response_tools + def _get_local_shell_tool_name( + self, + tools: ToolTypes | Callable[..., Any] | Sequence[ToolTypes | Callable[..., Any]] | None, + ) -> str | None: + """Return the name of the configured local shell tool function, if any.""" + for tool_item in normalize_tools(tools): + if not isinstance(tool_item, FunctionTool): + continue + if tool_item.kind != SHELL_TOOL_KIND_VALUE: + continue + shell_env = (tool_item.additional_properties or {}).get(OPENAI_SHELL_ENVIRONMENT_KEY) + if isinstance(shell_env, Mapping) and shell_env.get("type") == "local": + return tool_item.name + return None + # region Hosted Tool Factory Methods @staticmethod @@ -622,6 +660,92 @@ def get_image_generation_tool( return tool + @staticmethod + def get_shell_tool( + *, + func: Callable[..., Any] | FunctionTool | None = None, + environment: Literal["auto"] | dict[str, Any] | None = "auto", + name: str | None = None, + description: str | None = None, + approval_mode: Literal["always_require", "never_require"] | None = None, + ) -> Any: + """Create a shell tool for the Responses API. + + - When ``func`` is ``None`` (default), returns an OpenAI hosted shell + tool declaration. + - When ``func`` is provided, returns a local FunctionTool that is + declared to OpenAI as a local shell tool and executed via the function + invocation layer. + + Keyword Args: + func: Optional local shell function or ``FunctionTool``. + environment: Container environment configuration. + Used only when ``func`` is ``None``. + Use ``"auto"`` (default) for managed containers, or provide a + dict with explicit hosted container settings. + name: Optional local tool name when ``func`` is provided. + description: Optional local tool description when ``func`` is provided. + approval_mode: Optional local tool approval mode. + + Returns: + A hosted shell declaration or a local shell FunctionTool. + + Examples: + .. code-block:: python + + from agent_framework.openai import OpenAIResponsesClient + + # Hosted shell (OpenAI container) + tool = OpenAIResponsesClient.get_shell_tool() + + # Hosted shell with custom environment + tool = OpenAIResponsesClient.get_shell_tool( + environment={"type": "container_auto", "file_ids": ["file-abc"]} + ) + + # Local shell execution + tool = OpenAIResponsesClient.get_shell_tool( + func=my_shell_func, + ) + """ + if func is None: + env_config: dict[str, Any] = ( + dict(environment) if isinstance(environment, dict) else {"type": "container_auto"} + ) + if env_config.get("type") == "local": + raise ValueError("Local shell requires func. Provide func for local execution.") + return FunctionShellTool(type="shell", environment=env_config) + + if isinstance(environment, dict): + raise ValueError("When func is provided, environment config is not supported.") + local_env = {"type": "local"} + + base_tool: FunctionTool + if isinstance(func, FunctionTool): + base_tool = func + if name is not None: + base_tool.name = name + if description is not None: + base_tool.description = description + if approval_mode is not None: + base_tool.approval_mode = approval_mode + else: + base_tool = tool( + func=func, + name=name, + description=description, + approval_mode=approval_mode, + ) + + if base_tool.func is None: + raise ValueError("Shell tool requires an executable function.") + + additional_properties = dict(base_tool.additional_properties or {}) + additional_properties[OPENAI_SHELL_ENVIRONMENT_KEY] = local_env + base_tool.additional_properties = additional_properties + base_tool.kind = SHELL_TOOL_KIND_VALUE + return base_tool + @staticmethod def get_mcp_tool( *, @@ -1044,13 +1168,34 @@ def _prepare_content_for_openai( "status": None, } case "function_result": + shell_output_type = ( + content.additional_properties.get(OPENAI_SHELL_OUTPUT_TYPE_KEY) + if content.additional_properties + else None + ) + if shell_output_type == OPENAI_SHELL_OUTPUT_TYPE_SHELL_CALL: + return { + "call_id": content.call_id, + "type": OPENAI_SHELL_OUTPUT_TYPE_SHELL_CALL, + "output": self._to_shell_call_output_payload(content), + } + local_shell_call_item_id = ( + content.additional_properties.get(OPENAI_LOCAL_SHELL_CALL_ITEM_ID_KEY) + if content.additional_properties + else None + ) + if shell_output_type == OPENAI_SHELL_OUTPUT_TYPE_LOCAL_SHELL_CALL and local_shell_call_item_id: + return { + "id": local_shell_call_item_id, + "type": OPENAI_SHELL_OUTPUT_TYPE_LOCAL_SHELL_CALL, + "output": self._to_local_shell_output_payload(content), + } # call_id for the result needs to be the same as the call_id for the function call - args: dict[str, Any] = { + return { "call_id": content.call_id, "type": "function_call_output", "output": content.result if content.result is not None else "", } - return args case "function_approval_request": return { "type": "mcp_approval_request", @@ -1076,6 +1221,65 @@ def _prepare_content_for_openai( logger.debug("Unsupported content type passed (type: %s)", content.type) return {} + @staticmethod + def _to_local_shell_output_payload(content: Content) -> str: + """Convert function tool output to the local shell JSON payload format.""" + payload: dict[str, Any] + if isinstance(content.result, Mapping): + payload = dict(content.result) + else: + payload = { + "stdout": "" if content.result is None else str(content.result), + } + if content.exception is not None and "stderr" not in payload: + payload["stderr"] = str(content.exception) + if "exit_code" not in payload: + payload["exit_code"] = 1 if content.exception else 0 + return json.dumps(payload, ensure_ascii=False) + + @staticmethod + def _to_shell_call_output_payload(content: Content) -> list[dict[str, Any]]: + """Convert function tool output to shell_call_output payload format.""" + payload: dict[str, Any] + if isinstance(content.result, Mapping): + payload = dict(content.result) + else: + payload = { + "stdout": "" if content.result is None else str(content.result), + } + if content.exception is not None and "stderr" not in payload: + payload["stderr"] = str(content.exception) + + # Pass through native payload shape when tool already returns shell output entries. + direct_output = payload.get("output") + if isinstance(direct_output, list) and all(isinstance(item, Mapping) for item in direct_output): + return [dict(item) for item in direct_output] + + stdout = str(payload.get("stdout", "")) + stderr = str(payload.get("stderr", "")) + timed_out = bool(payload.get("timed_out", False)) + if timed_out: + outcome: dict[str, Any] = {"type": "timeout"} + else: + exit_code_raw = payload.get("exit_code") + try: + exit_code = int(exit_code_raw) if exit_code_raw is not None else (1 if content.exception else 0) + except (TypeError, ValueError): + exit_code = 1 if content.exception else 0 + outcome = {"type": "exit", "exit_code": exit_code} + return [ + { + "stdout": stdout, + "stderr": stderr, + "outcome": outcome, + } + ] + + @staticmethod + def _join_shell_commands(commands: Sequence[str]) -> str: + """Join shell commands into a single executable command string.""" + return "\n".join(command for command in commands if command).strip() + # region Parse methods def _parse_response_from_openai( self, @@ -1087,6 +1291,7 @@ def _parse_response_from_openai( metadata: dict[str, Any] = response.metadata or {} contents: list[Content] = [] + local_shell_tool_name = self._get_local_shell_tool_name(options.get("tools")) for item in response.output: # type: ignore[reportUnknownMemberType] match item.type: # types: @@ -1332,6 +1537,97 @@ def _parse_response_from_openai( raw_representation=item, ) ) + case "shell_call": # ResponseFunctionShellToolCall + shell_call_id = item.call_id if hasattr(item, "call_id") else "" + shell_commands: list[str] = [] + shell_timeout_ms: int | None = None + shell_max_output: int | None = None + if action := getattr(item, "action", None): + shell_commands = list(getattr(action, "commands", []) or []) + shell_timeout_ms = getattr(action, "timeout_ms", None) + shell_max_output = getattr(action, "max_output_length", None) + if local_shell_tool_name: + command_text = self._join_shell_commands(shell_commands) + contents.append( + Content.from_function_call( + call_id=shell_call_id, + name=local_shell_tool_name, + arguments=json.dumps({"command": command_text}), + additional_properties={ + OPENAI_SHELL_OUTPUT_TYPE_KEY: OPENAI_SHELL_OUTPUT_TYPE_SHELL_CALL, + OPENAI_LOCAL_SHELL_COMMAND_PARTS_KEY: shell_commands, + }, + raw_representation=item, + ) + ) + else: + contents.append( + Content.from_shell_tool_call( + call_id=shell_call_id, + commands=shell_commands, + timeout_ms=shell_timeout_ms, + max_output_length=shell_max_output, + status=getattr(item, "status", None), + raw_representation=item, + ) + ) + case "local_shell_call": + local_call_id = getattr(item, "call_id", None) or "" + local_command_parts = list(getattr(getattr(item, "action", None), "command", []) or []) + local_command = shlex.join(local_command_parts) if local_command_parts else "" + if local_shell_tool_name: + contents.append( + Content.from_function_call( + call_id=local_call_id, + name=local_shell_tool_name, + arguments=json.dumps({"command": local_command}), + additional_properties={ + OPENAI_SHELL_OUTPUT_TYPE_KEY: OPENAI_SHELL_OUTPUT_TYPE_LOCAL_SHELL_CALL, + OPENAI_LOCAL_SHELL_CALL_ITEM_ID_KEY: getattr(item, "id", None), + OPENAI_LOCAL_SHELL_COMMAND_PARTS_KEY: local_command_parts, + }, + raw_representation=item, + ) + ) + else: + contents.append( + Content.from_shell_tool_call( + call_id=local_call_id, + commands=[local_command] if local_command else [], + timeout_ms=getattr(getattr(item, "action", None), "timeout_ms", None), + status=getattr(item, "status", None), + raw_representation=item, + ) + ) + case "shell_call_output": # ResponseFunctionShellToolCallOutput + shell_output_call_id = item.call_id if hasattr(item, "call_id") else "" + shell_outputs: list[Content] = [] + for shell_out in getattr(item, "output", []) or []: + s_exit_code: int | None = None + s_timed_out: bool | None = None + if outcome := getattr(shell_out, "outcome", None): + if getattr(outcome, "type", None) == "exit": + s_exit_code = getattr(outcome, "exit_code", None) + s_timed_out = False + elif getattr(outcome, "type", None) == "timeout": + s_timed_out = True + shell_outputs.append( + Content.from_shell_command_output( + stdout=getattr(shell_out, "stdout", None), + stderr=getattr(shell_out, "stderr", None), + exit_code=s_exit_code, + timed_out=s_timed_out, + raw_representation=shell_out, + ) + ) + contents.append( + Content.from_shell_tool_result( + call_id=shell_output_call_id, + outputs=shell_outputs, + max_output_length=getattr(item, "max_output_length", None), + raw_representation=item, + ) + ) case _: logger.debug("Unparsed output of type: %s: %s", item.type, item) response_message = Message(role="assistant", contents=contents) @@ -1370,6 +1666,7 @@ def _parse_chunk_from_openai( """Parse an OpenAI Responses API streaming event into a ChatResponseUpdate.""" metadata: dict[str, Any] = {} contents: list[Content] = [] + local_shell_tool_name = self._get_local_shell_tool_name(options.get("tools")) conversation_id: str | None = None response_id: str | None = None continuation_token: OpenAIContinuationToken | None = None @@ -1646,6 +1943,97 @@ def _parse_chunk_from_openai( raw_representation=event_item, ) ) + case "shell_call": # ResponseFunctionShellToolCall + s_call_id = getattr(event_item, "call_id", None) or "" + s_commands: list[str] = [] + s_timeout_ms: int | None = None + s_max_output: int | None = None + if s_action := getattr(event_item, "action", None): + s_commands = list(getattr(s_action, "commands", []) or []) + s_timeout_ms = getattr(s_action, "timeout_ms", None) + s_max_output = getattr(s_action, "max_output_length", None) + if local_shell_tool_name: + command_text = self._join_shell_commands(s_commands) + contents.append( + Content.from_function_call( + call_id=s_call_id, + name=local_shell_tool_name, + arguments=json.dumps({"command": command_text}), + additional_properties={ + OPENAI_SHELL_OUTPUT_TYPE_KEY: OPENAI_SHELL_OUTPUT_TYPE_SHELL_CALL, + OPENAI_LOCAL_SHELL_COMMAND_PARTS_KEY: s_commands, + }, + raw_representation=event_item, + ) + ) + else: + contents.append( + Content.from_shell_tool_call( + call_id=s_call_id, + commands=s_commands, + timeout_ms=s_timeout_ms, + max_output_length=s_max_output, + status=getattr(event_item, "status", None), + raw_representation=event_item, + ) + ) + case "local_shell_call": + local_call_id = getattr(event_item, "call_id", None) or "" + local_command_parts = list(getattr(getattr(event_item, "action", None), "command", []) or []) + local_command = shlex.join(local_command_parts) if local_command_parts else "" + if local_shell_tool_name: + contents.append( + Content.from_function_call( + call_id=local_call_id, + name=local_shell_tool_name, + arguments=json.dumps({"command": local_command}), + additional_properties={ + OPENAI_SHELL_OUTPUT_TYPE_KEY: OPENAI_SHELL_OUTPUT_TYPE_LOCAL_SHELL_CALL, + OPENAI_LOCAL_SHELL_CALL_ITEM_ID_KEY: getattr(event_item, "id", None), + OPENAI_LOCAL_SHELL_COMMAND_PARTS_KEY: local_command_parts, + }, + raw_representation=event_item, + ) + ) + else: + contents.append( + Content.from_shell_tool_call( + call_id=local_call_id, + commands=[local_command] if local_command else [], + timeout_ms=getattr(getattr(event_item, "action", None), "timeout_ms", None), + status=getattr(event_item, "status", None), + raw_representation=event_item, + ) + ) + case "shell_call_output": # ResponseFunctionShellToolCallOutput + s_out_call_id = getattr(event_item, "call_id", None) or "" + s_outputs: list[Content] = [] + for s_out in getattr(event_item, "output", []) or []: + s_exit_code: int | None = None + s_timed_out: bool | None = None + if s_outcome := getattr(s_out, "outcome", None): + if getattr(s_outcome, "type", None) == "exit": + s_exit_code = getattr(s_outcome, "exit_code", None) + s_timed_out = False + elif getattr(s_outcome, "type", None) == "timeout": + s_timed_out = True + s_outputs.append( + Content.from_shell_command_output( + stdout=getattr(s_out, "stdout", None), + stderr=getattr(s_out, "stderr", None), + exit_code=s_exit_code, + timed_out=s_timed_out, + raw_representation=s_out, + ) + ) + contents.append( + Content.from_shell_tool_result( + call_id=s_out_call_id, + outputs=s_outputs, + max_output_length=getattr(event_item, "max_output_length", None), + raw_representation=event_item, + ) + ) case "reasoning": # ResponseOutputReasoning reasoning_id = getattr(event_item, "id", None) added_reasoning = False diff --git a/python/packages/core/tests/core/test_types.py b/python/packages/core/tests/core/test_types.py index 8a8885b919..c858ff1e3f 100644 --- a/python/packages/core/tests/core/test_types.py +++ b/python/packages/core/tests/core/test_types.py @@ -332,6 +332,120 @@ def test_mcp_server_tool_call_and_result(): assert call2.call_id == "" +# region: Shell tool content + + +def test_shell_tool_call_content_creation(): + call = Content.from_shell_tool_call( + call_id="shell-1", + commands=["ls -la", "pwd"], + timeout_ms=60000, + max_output_length=4096, + status="completed", + ) + + assert call.type == "shell_tool_call" + assert call.call_id == "shell-1" + assert call.commands == ["ls -la", "pwd"] + assert call.timeout_ms == 60000 + assert call.max_output_length == 4096 + assert call.status == "completed" + + +def test_shell_tool_call_content_minimal(): + call = Content.from_shell_tool_call(call_id="shell-2") + + assert call.type == "shell_tool_call" + assert call.call_id == "shell-2" + assert call.commands is None + assert call.timeout_ms is None + assert call.max_output_length is None + assert call.status is None + + +def test_shell_tool_result_content_creation(): + result = Content.from_shell_tool_result( + call_id="shell-1", + outputs=[ + Content.from_shell_command_output(stdout="hello world\n", stderr=None, exit_code=0, timed_out=False), + Content.from_shell_command_output(stderr="error msg", exit_code=1, timed_out=False), + ], + max_output_length=4096, + ) + + assert result.type == "shell_tool_result" + assert result.call_id == "shell-1" + assert result.outputs is not None + assert len(result.outputs) == 2 + assert result.outputs[0].type == "shell_command_output" + assert result.outputs[0].stdout == "hello world\n" + assert result.outputs[0].exit_code == 0 + assert result.outputs[0].timed_out is False + assert result.outputs[1].type == "shell_command_output" + assert result.outputs[1].stderr == "error msg" + assert result.outputs[1].exit_code == 1 + assert result.max_output_length == 4096 + + +def test_shell_tool_result_with_timeout(): + result = Content.from_shell_tool_result( + call_id="shell-t", + outputs=[Content.from_shell_command_output(stdout="partial", timed_out=True)], + ) + + assert result.type == "shell_tool_result" + assert result.outputs is not None + assert result.outputs[0].timed_out is True + assert result.outputs[0].exit_code is None + + +def test_shell_command_output_content_creation(): + output = Content.from_shell_command_output( + stdout="hello\n", + stderr="warn\n", + exit_code=0, + timed_out=False, + ) + + assert output.type == "shell_command_output" + assert output.stdout == "hello\n" + assert output.stderr == "warn\n" + assert output.exit_code == 0 + assert output.timed_out is False + + +def test_shell_content_serialization_roundtrip(): + call = Content.from_shell_tool_call( + call_id="shell-r", + commands=["echo hello"], + timeout_ms=30000, + status="completed", + ) + call_dict = call.to_dict() + restored_call = Content.from_dict(call_dict) + assert restored_call.type == "shell_tool_call" + assert restored_call.call_id == "shell-r" + assert restored_call.commands == ["echo hello"] + assert restored_call.timeout_ms == 30000 + assert restored_call.status == "completed" + + result = Content.from_shell_tool_result( + call_id="shell-r", + outputs=[Content.from_shell_command_output(stdout="hello\n", exit_code=0, timed_out=False)], + max_output_length=4096, + ) + result_dict = result.to_dict() + restored_result = Content.from_dict(result_dict) + assert restored_result.type == "shell_tool_result" + assert restored_result.call_id == "shell-r" + assert restored_result.outputs is not None + assert len(restored_result.outputs) == 1 + assert restored_result.outputs[0].type == "shell_command_output" + assert restored_result.outputs[0].stdout == "hello\n" + assert restored_result.outputs[0].exit_code == 0 + assert restored_result.max_output_length == 4096 + + # region: HostedVectorStoreContent diff --git a/python/packages/core/tests/openai/test_openai_assistants_client.py b/python/packages/core/tests/openai/test_openai_assistants_client.py index 1ce40eeba0..21f7173ca3 100644 --- a/python/packages/core/tests/openai/test_openai_assistants_client.py +++ b/python/packages/core/tests/openai/test_openai_assistants_client.py @@ -7,19 +7,6 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest -from agent_framework import ( - Agent, - AgentResponse, - AgentResponseUpdate, - AgentSession, - ChatResponse, - ChatResponseUpdate, - Content, - Message, - SupportsChatGetResponse, - tool, -) -from agent_framework.openai import OpenAIAssistantsClient from openai.types.beta.threads import ( FileCitationAnnotation, FilePathAnnotation, @@ -35,6 +22,20 @@ from openai.types.beta.threads.runs import RunStep from pydantic import Field +from agent_framework import ( + Agent, + AgentResponse, + AgentResponseUpdate, + AgentSession, + ChatResponse, + ChatResponseUpdate, + Content, + Message, + SupportsChatGetResponse, + tool, +) +from agent_framework.openai import OpenAIAssistantsClient + skip_if_openai_integration_tests_disabled = pytest.mark.skipif( os.getenv("OPENAI_API_KEY", "") in ("", "test-dummy-key"), reason="No real OPENAI_API_KEY provided; skipping integration tests.", @@ -1720,8 +1721,6 @@ async def test_message_completed_with_file_citation(self, client): assert ann["annotated_regions"][0]["start_index"] == 10 assert ann["annotated_regions"][0]["end_index"] == 24 - - @pytest.mark.asyncio async def test_message_completed_with_file_path(self, client): """Verify file path annotations are extracted from completed messages.""" diff --git a/python/packages/core/tests/openai/test_openai_responses_client.py b/python/packages/core/tests/openai/test_openai_responses_client.py index 7eaae1e776..e049dbd16e 100644 --- a/python/packages/core/tests/openai/test_openai_responses_client.py +++ b/python/packages/core/tests/openai/test_openai_responses_client.py @@ -31,6 +31,7 @@ ChatResponse, ChatResponseUpdate, Content, + FunctionTool, Message, SupportsChatGetResponse, tool, @@ -38,6 +39,7 @@ from agent_framework.exceptions import ChatClientException, ChatClientInvalidRequestException from agent_framework.openai import OpenAIResponsesClient from agent_framework.openai._exceptions import OpenAIContentFilterException +from agent_framework.openai._responses_client import OPENAI_LOCAL_SHELL_CALL_ITEM_ID_KEY skip_if_openai_integration_tests_disabled = pytest.mark.skipif( os.getenv("OPENAI_API_KEY", "") in ("", "test-dummy-key"), @@ -564,6 +566,386 @@ def test_response_content_creation_with_code_interpreter() -> None: assert any(out.type == "uri" for out in result_content.outputs) +def test_get_shell_tool_basic() -> None: + """Test get_shell_tool returns hosted shell config with default auto environment.""" + tool = OpenAIResponsesClient.get_shell_tool() + assert tool.type == "shell" + assert tool.environment.type == "container_auto" + + +def test_get_shell_tool_rejects_local_without_func() -> None: + """Local environment requires a local function executor.""" + with pytest.raises(ValueError, match="Local shell requires func"): + OpenAIResponsesClient.get_shell_tool(environment={"type": "local"}) + + +def test_get_shell_tool_rejects_environment_config_with_func() -> None: + """Environment config is hosted-only and must not be passed with func.""" + + def local_exec(command: str) -> str: + return command + + with pytest.raises(ValueError, match="environment config is not supported"): + OpenAIResponsesClient.get_shell_tool( + func=local_exec, + environment={"type": "container_auto"}, + ) + + +def test_get_shell_tool_local_executor_maps_to_shell_tool() -> None: + """Test local shell FunctionTool maps to OpenAI shell tool declaration.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + def local_exec(command: str) -> str: + return command + + local_shell_tool = OpenAIResponsesClient.get_shell_tool( + func=local_exec, + approval_mode="never_require", + ) + + assert isinstance(local_shell_tool, FunctionTool) + response_tools = client._prepare_tools_for_openai([local_shell_tool]) + assert len(response_tools) == 1 + assert response_tools[0].type == "shell" + assert response_tools[0].environment.type == "local" + + +def test_get_shell_tool_reuses_function_tool_instance() -> None: + """Passing a FunctionTool should update and return the same tool instance.""" + + @tool(name="run_shell", approval_mode="never_require") + def run_shell(command: str) -> str: + return command + + shell_tool = OpenAIResponsesClient.get_shell_tool( + func=run_shell, + description="Run local shell command", + approval_mode="always_require", + ) + + assert shell_tool is run_shell + assert shell_tool.kind == "shell" + assert shell_tool.description == "Run local shell command" + assert shell_tool.approval_mode == "always_require" + assert (shell_tool.additional_properties or {}).get("openai.responses.shell.environment") == {"type": "local"} + + +def test_response_content_creation_with_local_shell_call_maps_to_function_call() -> None: + """Test local_shell_call is translated into function_call for invocation loop.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + def local_exec(command: str) -> str: + return command + + local_shell_tool = OpenAIResponsesClient.get_shell_tool(func=local_exec) + + mock_response = MagicMock() + mock_response.output_parsed = None + mock_response.metadata = {} + mock_response.usage = None + mock_response.id = "test-id" + mock_response.model = "test-model" + mock_response.created_at = 1000000000 + mock_response.status = "completed" + mock_response.incomplete = None + + mock_action = MagicMock() + mock_action.command = ["python", "--version"] + mock_action.timeout_ms = 30000 + + mock_local_shell_call = MagicMock() + mock_local_shell_call.type = "local_shell_call" + mock_local_shell_call.id = "local-shell-item-1" + mock_local_shell_call.call_id = "local-shell-call-1" + mock_local_shell_call.action = mock_action + mock_local_shell_call.status = "completed" + + mock_response.output = [mock_local_shell_call] + + response = client._parse_response_from_openai(mock_response, options={"tools": [local_shell_tool]}) # type: ignore[arg-type] + assert len(response.messages[0].contents) == 1 + call_content = response.messages[0].contents[0] + assert call_content.type == "function_call" + assert call_content.call_id == "local-shell-call-1" + assert call_content.name == local_shell_tool.name + assert call_content.parse_arguments() == {"command": "python --version"} + assert call_content.additional_properties[OPENAI_LOCAL_SHELL_CALL_ITEM_ID_KEY] == "local-shell-item-1" + + +@pytest.mark.asyncio +async def test_local_shell_tool_is_invoked_in_function_loop() -> None: + """Test local shell call executes executor and sends local_shell_call_output.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + executed_commands: list[str] = [] + + def local_exec(command: str) -> str: + executed_commands.append(command) + return "Python 3.13.0" + + local_shell_tool = OpenAIResponsesClient.get_shell_tool( + func=local_exec, + approval_mode="never_require", + ) + + mock_response1 = MagicMock() + mock_response1.output_parsed = None + mock_response1.metadata = {} + mock_response1.usage = None + mock_response1.id = "resp-1" + mock_response1.model = "test-model" + mock_response1.created_at = 1000000000 + mock_response1.status = "completed" + mock_response1.finish_reason = "tool_calls" + mock_response1.incomplete = None + + mock_action = MagicMock() + mock_action.command = ["python", "--version"] + mock_action.timeout_ms = 30000 + + mock_local_shell_call = MagicMock() + mock_local_shell_call.type = "local_shell_call" + mock_local_shell_call.id = "local-shell-item-1" + mock_local_shell_call.call_id = "local-shell-call-1" + mock_local_shell_call.action = mock_action + mock_local_shell_call.status = "completed" + mock_response1.output = [mock_local_shell_call] + + mock_response2 = MagicMock() + mock_response2.output_parsed = None + mock_response2.metadata = {} + mock_response2.usage = None + mock_response2.id = "resp-2" + mock_response2.model = "test-model" + mock_response2.created_at = 1000000001 + mock_response2.status = "completed" + mock_response2.finish_reason = "stop" + mock_response2.incomplete = None + + mock_text_item = MagicMock() + mock_text_item.type = "message" + mock_text_content = MagicMock() + mock_text_content.type = "output_text" + mock_text_content.text = "Python 3.13.0" + mock_text_item.content = [mock_text_content] + mock_response2.output = [mock_text_item] + + with patch.object(client.client.responses, "create", side_effect=[mock_response1, mock_response2]) as mock_create: + await client.get_response( + messages=[Message(role="user", text="What Python version is available?")], + options={"tools": [local_shell_tool]}, + ) + + assert executed_commands == ["python --version"] + assert mock_create.call_count == 2 + second_call_input = mock_create.call_args_list[1].kwargs["input"] + local_shell_outputs = [item for item in second_call_input if item.get("type") == "local_shell_call_output"] + assert len(local_shell_outputs) == 1 + output_payload = json.loads(local_shell_outputs[0]["output"]) + assert output_payload["stdout"] == "Python 3.13.0" + + +@pytest.mark.asyncio +async def test_shell_call_is_invoked_as_local_shell_function_loop() -> None: + """Test shell_call maps to local function invocation and returns shell_call_output.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + executed_commands: list[str] = [] + + def local_exec(command: str) -> str: + executed_commands.append(command) + return "Python 3.13.0" + + local_shell_tool = OpenAIResponsesClient.get_shell_tool( + func=local_exec, + approval_mode="never_require", + ) + + mock_response1 = MagicMock() + mock_response1.output_parsed = None + mock_response1.metadata = {} + mock_response1.usage = None + mock_response1.id = "resp-1" + mock_response1.model = "test-model" + mock_response1.created_at = 1000000000 + mock_response1.status = "completed" + mock_response1.finish_reason = "tool_calls" + mock_response1.incomplete = None + + mock_action = MagicMock() + mock_action.commands = ["python --version"] + mock_action.timeout_ms = 30000 + mock_action.max_output_length = 4096 + + mock_shell_call = MagicMock() + mock_shell_call.type = "shell_call" + mock_shell_call.id = "sh_test_shell_call_1" + mock_shell_call.call_id = "shell-call-1" + mock_shell_call.action = mock_action + mock_shell_call.status = "completed" + mock_response1.output = [mock_shell_call] + + mock_response2 = MagicMock() + mock_response2.output_parsed = None + mock_response2.metadata = {} + mock_response2.usage = None + mock_response2.id = "resp-2" + mock_response2.model = "test-model" + mock_response2.created_at = 1000000001 + mock_response2.status = "completed" + mock_response2.finish_reason = "stop" + mock_response2.incomplete = None + + mock_text_item = MagicMock() + mock_text_item.type = "message" + mock_text_content = MagicMock() + mock_text_content.type = "output_text" + mock_text_content.text = "Python 3.13.0" + mock_text_item.content = [mock_text_content] + mock_response2.output = [mock_text_item] + + with patch.object(client.client.responses, "create", side_effect=[mock_response1, mock_response2]) as mock_create: + await client.get_response( + messages=[Message(role="user", text="What Python version is available?")], + options={"tools": [local_shell_tool]}, + ) + + assert executed_commands == ["python --version"] + assert mock_create.call_count == 2 + second_call_input = mock_create.call_args_list[1].kwargs["input"] + shell_outputs = [item for item in second_call_input if item.get("type") == "shell_call_output"] + assert len(shell_outputs) == 1 + assert shell_outputs[0]["call_id"] == "shell-call-1" + assert isinstance(shell_outputs[0]["output"], list) + assert shell_outputs[0]["output"][0]["stdout"] == "Python 3.13.0" + local_shell_outputs = [item for item in second_call_input if item.get("type") == "local_shell_call_output"] + assert len(local_shell_outputs) == 0 + + +def test_response_content_creation_with_shell_call() -> None: + """Test _parse_response_from_openai with shell_call output.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + mock_response = MagicMock() + mock_response.output_parsed = None + mock_response.metadata = {} + mock_response.usage = None + mock_response.id = "test-id" + mock_response.model = "test-model" + mock_response.created_at = 1000000000 + mock_response.status = "completed" + mock_response.incomplete = None + + mock_action = MagicMock() + mock_action.commands = ["ls -la", "pwd"] + mock_action.timeout_ms = 60000 + mock_action.max_output_length = 4096 + + mock_shell_call = MagicMock() + mock_shell_call.type = "shell_call" + mock_shell_call.call_id = "shell-call-1" + mock_shell_call.action = mock_action + mock_shell_call.status = "completed" + + mock_response.output = [mock_shell_call] + + response = client._parse_response_from_openai(mock_response, options={}) # type: ignore + + assert len(response.messages[0].contents) == 1 + call_content = response.messages[0].contents[0] + assert call_content.type == "shell_tool_call" + assert call_content.call_id == "shell-call-1" + assert call_content.commands == ["ls -la", "pwd"] + assert call_content.timeout_ms == 60000 + assert call_content.max_output_length == 4096 + assert call_content.status == "completed" + + +def test_response_content_creation_with_shell_call_output() -> None: + """Test _parse_response_from_openai with shell_call_output output.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + mock_response = MagicMock() + mock_response.output_parsed = None + mock_response.metadata = {} + mock_response.usage = None + mock_response.id = "test-id" + mock_response.model = "test-model" + mock_response.created_at = 1000000000 + mock_response.status = "completed" + mock_response.incomplete = None + + mock_outcome = MagicMock() + mock_outcome.type = "exit" + mock_outcome.exit_code = 0 + + mock_output_entry = MagicMock() + mock_output_entry.stdout = "hello world\n" + mock_output_entry.stderr = "" + mock_output_entry.outcome = mock_outcome + + mock_shell_output = MagicMock() + mock_shell_output.type = "shell_call_output" + mock_shell_output.call_id = "shell-call-1" + mock_shell_output.output = [mock_output_entry] + mock_shell_output.max_output_length = 4096 + + mock_response.output = [mock_shell_output] + + response = client._parse_response_from_openai(mock_response, options={}) # type: ignore + + assert len(response.messages[0].contents) == 1 + result_content = response.messages[0].contents[0] + assert result_content.type == "shell_tool_result" + assert result_content.call_id == "shell-call-1" + assert result_content.outputs is not None + assert len(result_content.outputs) == 1 + assert result_content.outputs[0].type == "shell_command_output" + assert result_content.outputs[0].stdout == "hello world\n" + assert result_content.outputs[0].exit_code == 0 + assert result_content.outputs[0].timed_out is False + assert result_content.max_output_length == 4096 + + +def test_response_content_creation_with_shell_call_timeout() -> None: + """Test _parse_response_from_openai with shell_call_output that timed out.""" + client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") + + mock_response = MagicMock() + mock_response.output_parsed = None + mock_response.metadata = {} + mock_response.usage = None + mock_response.id = "test-id" + mock_response.model = "test-model" + mock_response.created_at = 1000000000 + mock_response.status = "completed" + mock_response.incomplete = None + + mock_outcome = MagicMock() + mock_outcome.type = "timeout" + + mock_output_entry = MagicMock() + mock_output_entry.stdout = "partial output" + mock_output_entry.stderr = None + mock_output_entry.outcome = mock_outcome + + mock_shell_output = MagicMock() + mock_shell_output.type = "shell_call_output" + mock_shell_output.call_id = "shell-call-t" + mock_shell_output.output = [mock_output_entry] + mock_shell_output.max_output_length = None + + mock_response.output = [mock_shell_output] + + response = client._parse_response_from_openai(mock_response, options={}) # type: ignore + + result_content = response.messages[0].contents[0] + assert result_content.type == "shell_tool_result" + assert result_content.outputs is not None + assert result_content.outputs[0].type == "shell_command_output" + assert result_content.outputs[0].timed_out is True + assert result_content.outputs[0].exit_code is None + + def test_response_content_creation_with_function_call() -> None: """Test _parse_response_from_openai with function call content.""" client = OpenAIResponsesClient(model_id="test-model", api_key="test-key") diff --git a/python/packages/core/tests/workflow/test_agent_executor.py b/python/packages/core/tests/workflow/test_agent_executor.py index db53868ee1..4a850db642 100644 --- a/python/packages/core/tests/workflow/test_agent_executor.py +++ b/python/packages/core/tests/workflow/test_agent_executor.py @@ -286,9 +286,7 @@ async def test_agent_executor_run_streaming_with_stream_kwarg_does_not_raise() - @pytest.mark.parametrize("reserved_kwarg", ["session", "stream", "messages"]) -async def test_prepare_agent_run_args_strips_reserved_kwargs( - reserved_kwarg: str, caplog: "LogCaptureFixture" -) -> None: +async def test_prepare_agent_run_args_strips_reserved_kwargs(reserved_kwarg: str, caplog: "LogCaptureFixture") -> None: """_prepare_agent_run_args must remove reserved kwargs and log a warning.""" raw = {reserved_kwarg: "should-be-stripped", "custom_key": "keep-me"} diff --git a/python/packages/core/tests/workflow/test_workflow_kwargs.py b/python/packages/core/tests/workflow/test_workflow_kwargs.py index 379435e124..ce1465effc 100644 --- a/python/packages/core/tests/workflow/test_workflow_kwargs.py +++ b/python/packages/core/tests/workflow/test_workflow_kwargs.py @@ -499,9 +499,7 @@ async def _done() -> AgentResponse: # Continue with responses only — no new kwargs approval = request_events[0] - await workflow.run( - responses={approval.request_id: approval.data.to_function_approval_response(True)} - ) + await workflow.run(responses={approval.request_id: approval.data.to_function_approval_response(True)}) # Both calls should have received the original kwargs assert len(agent.captured_kwargs) == 2 diff --git a/python/samples/02-agents/providers/anthropic/anthropic_with_shell.py b/python/samples/02-agents/providers/anthropic/anthropic_with_shell.py new file mode 100644 index 0000000000..40c6aedc43 --- /dev/null +++ b/python/samples/02-agents/providers/anthropic/anthropic_with_shell.py @@ -0,0 +1,100 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +import subprocess +from typing import Any + +from agent_framework import Agent, Message, tool +from agent_framework.anthropic import AnthropicClient +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +""" +Anthropic Client with Shell Tool Example + +This sample demonstrates using @tool(approval_mode=...) with AnthropicClient +for executing bash commands locally. The bash tool tells the model it can +request shell commands, while the actual execution happens on YOUR machine +via a user-provided function. + +SECURITY NOTE: This example executes real commands on your local machine. +Only enable this when you trust the agent's actions. Consider implementing +allowlists, sandboxing, or approval workflows for production use. +""" + + +@tool(approval_mode="always_require") +def run_bash(command: str) -> str: + """Execute a bash command using subprocess and return the output.""" + try: + result = subprocess.run( + command, + shell=True, + capture_output=True, + text=True, + timeout=30, + ) + parts: list[str] = [] + if result.stdout: + parts.append(result.stdout) + if result.stderr: + parts.append(f"stderr: {result.stderr}") + parts.append(f"exit_code: {result.returncode}") + return "\n".join(parts) + except subprocess.TimeoutExpired: + return "Command timed out after 30 seconds" + except Exception as e: + return f"Error executing command: {e}" + + +async def main() -> None: + """Example showing how to use the shell tool with AnthropicClient.""" + print("=== Anthropic Agent with Shell Tool Example ===") + print("NOTE: Commands will execute on your local machine.\n") + + client = AnthropicClient() + shell = client.get_shell_tool(func=run_bash) + agent = Agent( + client=client, + instructions="You are a helpful assistant that can execute bash commands to answer questions.", + tools=[shell], + ) + + query = "Use bash to print 'Hello from Anthropic shell!' and show the current working directory" + print(f"User: {query}") + result = await run_with_approvals(query, agent) + print(f"Result: {result}\n") + + +async def run_with_approvals(query: str, agent: Agent) -> Any: + """Run the agent and handle shell approvals outside tool execution.""" + current_input: str | list[Any] = query + while True: + result = await agent.run(current_input) + if not result.user_input_requests: + return result + + next_input: list[Any] = [query] + rejected = False + for user_input_needed in result.user_input_requests: + print( + f"\nShell request: {user_input_needed.function_call.name}" + f"\nArguments: {user_input_needed.function_call.arguments}" + ) + user_approval = await asyncio.to_thread(input, "\nApprove shell command? (y/n): ") + approved = user_approval.strip().lower() == "y" + next_input.append(Message("assistant", [user_input_needed])) + next_input.append(Message("user", [user_input_needed.to_function_approval_response(approved)])) + if not approved: + rejected = True + break + if rejected: + print("\nShell command rejected. Stopping without additional approval prompts.") + return "Shell command execution was rejected by user." + current_input = next_input + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/samples/02-agents/providers/openai/openai_responses_client_with_local_shell.py b/python/samples/02-agents/providers/openai/openai_responses_client_with_local_shell.py new file mode 100644 index 0000000000..b3135702a7 --- /dev/null +++ b/python/samples/02-agents/providers/openai/openai_responses_client_with_local_shell.py @@ -0,0 +1,116 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio +import subprocess +from typing import Any + +from agent_framework import Agent, Message, tool +from agent_framework.openai import OpenAIResponsesClient +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +""" +OpenAI Responses Client with Local Shell Tool Example + +This sample demonstrates implementing a local shell tool using get_shell_tool(func=...) +that wraps Python's subprocess module. Unlike the hosted shell tool (get_shell_tool()), +local shell execution runs commands on YOUR machine, not in a remote container. + +SECURITY NOTE: This example executes real commands on your local machine. +Only enable this when you trust the agent's actions. Consider implementing +allowlists, sandboxing, or approval workflows for production use. +""" + + +@tool(approval_mode="always_require") +def run_bash(command: str) -> str: + """Execute a shell command locally and return stdout, stderr, and exit code.""" + try: + result = subprocess.run( + command, + shell=True, + capture_output=True, + text=True, + timeout=30, + ) + parts: list[str] = [] + if result.stdout: + parts.append(result.stdout) + if result.stderr: + parts.append(f"stderr: {result.stderr}") + parts.append(f"exit_code: {result.returncode}") + return "\n".join(parts) + except subprocess.TimeoutExpired: + return "Command timed out after 30 seconds" + except Exception as e: + return f"Error executing command: {e}" + + +async def main() -> None: + """Example showing how to use a local shell tool with OpenAI.""" + print("=== OpenAI Agent with Local Shell Tool Example ===") + print("NOTE: Commands will execute on your local machine.\n") + + client = OpenAIResponsesClient() + local_shell_tool = client.get_shell_tool( + func=run_bash, + ) + + agent = Agent( + client=client, + instructions="You are a helpful assistant that can run shell commands to help the user.", + tools=[local_shell_tool], + ) + + query = "Use the run_bash tool to execute `python --version` and show only the command output." + print(f"User: {query}") + result = await run_with_approvals(query, agent) + if isinstance(result, str): + print(f"Agent: {result}\n") + return + if result.text: + print(f"Agent: {result.text}\n") + else: + printed = False + for message in result.messages: + for content in message.contents: + if content.type == "function_result" and content.result: + print(f"Agent (tool output): {content.result}\n") + printed = True + if not printed: + print("Agent: (no text output returned)\n") + + +async def run_with_approvals(query: str, agent: Agent) -> Any: + """Run the agent and handle shell approvals outside tool execution.""" + current_input: str | list[Any] = query + + while True: + result = await agent.run(current_input) + if not result.user_input_requests: + return result + + next_input: list[Any] = [query] + rejected = False + for user_input_needed in result.user_input_requests: + print( + f"\nShell request: {user_input_needed.function_call.name}" + f"\nArguments: {user_input_needed.function_call.arguments}" + ) + user_approval = await asyncio.to_thread(input, "\nApprove shell command? (y/n): ") + approved = user_approval.strip().lower() == "y" + next_input.append(Message("assistant", [user_input_needed])) + next_input.append(Message("user", [user_input_needed.to_function_approval_response(approved)])) + if not approved: + rejected = True + break + if rejected: + print("\nShell command rejected. Stopping without additional approval prompts.") + return "Shell command execution was rejected by user." + current_input = next_input + + +if __name__ == "__main__": + asyncio.run(main()) diff --git a/python/samples/02-agents/providers/openai/openai_responses_client_with_shell.py b/python/samples/02-agents/providers/openai/openai_responses_client_with_shell.py new file mode 100644 index 0000000000..b86f36fde5 --- /dev/null +++ b/python/samples/02-agents/providers/openai/openai_responses_client_with_shell.py @@ -0,0 +1,61 @@ +# Copyright (c) Microsoft. All rights reserved. + +import asyncio + +from agent_framework import Agent +from agent_framework.openai import OpenAIResponsesClient +from dotenv import load_dotenv + +# Load environment variables from .env file +load_dotenv() + +""" +OpenAI Responses Client with Shell Tool Example + +This sample demonstrates using get_shell_tool() with OpenAI Responses Client +for executing shell commands in a managed container environment hosted by OpenAI. + +The shell tool allows the model to run commands like listing files, running scripts, +or performing system operations within a secure, sandboxed container. +""" + + +async def main() -> None: + """Example showing how to use the shell tool with OpenAI Responses.""" + print("=== OpenAI Responses Agent with Shell Tool Example ===") + + client = OpenAIResponsesClient() + + # Create a hosted shell tool with the default auto container environment + shell_tool = client.get_shell_tool() + + agent = Agent( + client=client, + instructions="You are a helpful assistant that can execute shell commands to answer questions.", + tools=shell_tool, + ) + + query = "Use a shell command to show the current date and time" + print(f"User: {query}") + result = await agent.run(query) + print(f"Result: {result}\n") + + # Print shell-specific content details + for message in result.messages: + shell_calls = [c for c in message.contents if c.type == "shell_tool_call"] + shell_results = [c for c in message.contents if c.type == "shell_tool_result"] + + if shell_calls: + print(f"Shell commands: {shell_calls[0].commands}") + if shell_results and shell_results[0].outputs: + for output in shell_results[0].outputs: + if output.stdout: + print(f"Stdout: {output.stdout}") + if output.stderr: + print(f"Stderr: {output.stderr}") + if output.exit_code is not None: + print(f"Exit code: {output.exit_code}") + + +if __name__ == "__main__": + asyncio.run(main())