From 2a3b8d7262e2f79269c8216a1d01f84207876fe3 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Thu, 13 Feb 2020 22:46:30 -0800 Subject: [PATCH 01/53] allow order objects by position in view, reposition camera, narrower camera view angle for pioneer2dx_noplugin --- .../models/pioneer2dx_noplugin/model.sdf | 6 +- python/social_bot/tasks.py | 100 +++++++++++------- 2 files changed, 66 insertions(+), 40 deletions(-) diff --git a/python/social_bot/models/pioneer2dx_noplugin/model.sdf b/python/social_bot/models/pioneer2dx_noplugin/model.sdf index f58365c1..045ba5c6 100644 --- a/python/social_bot/models/pioneer2dx_noplugin/model.sdf +++ b/python/social_bot/models/pioneer2dx_noplugin/model.sdf @@ -63,7 +63,7 @@ - 0.25 0 0.25 0 0 0 + 0.25 0 0.20 0 0 0 @@ -73,13 +73,13 @@ - 2.0944 + 1.0472 128 128 - 0.1 + 0.001 100 diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 60f7ee22..8ff4838e 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -17,6 +17,7 @@ import math import numpy as np +import operator import os import gin import itertools @@ -169,44 +170,48 @@ class GoalTask(Task): it will get reward -1. """ - def __init__(self, - env, - max_steps, - goal_name="ball", - distraction_list=[ - 'coke_can', 'table', 'car_wheel', 'plastic_cup', 'beer' - ], - success_distance_thresh=0.5, - fail_distance_thresh=2.0, - distraction_penalty_distance_thresh=0, - distraction_penalty=0.5, - random_agent_orientation=False, - sparse_reward=True, - random_range=5.0, - polar_coord=True, - random_goal=False, - use_curriculum_training=False, - curriculum_distractions=True, - curriculum_target_angle=False, - switch_goal_within_episode=False, - start_range=0, - increase_range_by_percent=50., - reward_thresh_to_increase_range=0.4, - percent_full_range_in_curriculum=0.1, - max_reward_q_length=100, - reward_weight=1.0, - move_goal_during_episode=True, - end_episode_after_success=False, - success_with_angle_requirement=True, - additional_observation_list=[], - use_egocentric_states=False, - egocentric_perception_range=0): + def __init__( + self, + env, + max_steps, + goal_name="ball", + distraction_list=[ + 'coke_can', 'table', 'car_wheel', 'plastic_cup', 'beer' + ], + end_on_hitting_distraction=False, + end_episode_after_success=False, + success_distance_thresh=0.5, + fail_distance_thresh=2.0, + distraction_penalty_distance_thresh=0, + distraction_penalty=0.5, + random_agent_orientation=False, + sparse_reward=True, + random_range=5.0, + polar_coord=True, + random_goal=False, + use_curriculum_training=False, + curriculum_distractions=True, + curriculum_target_angle=False, + switch_goal_within_episode=False, + start_range=0, + increase_range_by_percent=50., + reward_thresh_to_increase_range=0.4, + percent_full_range_in_curriculum=0.1, + max_reward_q_length=100, + reward_weight=1.0, + move_goal_during_episode=True, + success_with_angle_requirement=True, + additional_observation_list=[], + use_egocentric_states=False, + egocentric_perception_range=0, + order_obj_by_view=False): """ Args: env (gym.Env): an instance of Environment max_steps (int): episode will end if not reaching gaol in so many steps goal_name (string): name of the goal in the world distraction_list (list of string): a list of model. the model shoud be in gazebo database + end_on_hitting_distraction (bool): whether to end episode on hitting distraction success_distance_thresh (float): the goal is reached if it's within this distance to the agent fail_distance_thresh (float): if the agent moves away from the goal more than this distance, it's considered a failure and is given reward -1 @@ -247,6 +252,8 @@ def __init__(self, egocentric_perception_range (float): the max range in degree to limit the agent's observation. E.g. 60 means object is only visible when it's within +/-60 degrees in front of the agent's direction (yaw). + order_obj_by_view (bool): False to order object coordinates (full states) in object id order; + True to order according to y axis value (left to right) together with object id. """ self._max_play_ground_size = 5 # play ground will be (-5, 5) for both x and y axes. # TODO: Remove the default grey walls in the play ground world file, @@ -255,6 +262,7 @@ def __init__(self, super().__init__( env=env, max_steps=max_steps, reward_weight=reward_weight) self._goal_name = goal_name + self.end_on_hitting_distraction = end_on_hitting_distraction self._success_distance_thresh = success_distance_thresh self._fail_distance_thresh = fail_distance_thresh self._distraction_penalty_distance_thresh = distraction_penalty_distance_thresh @@ -294,6 +302,7 @@ def __init__(self, self._polar_coord = polar_coord self._use_egocentric_states = use_egocentric_states self._egocentric_perception_range = egocentric_perception_range + self._order_obj_by_view = order_obj_by_view if self.should_use_curriculum_training(): self._orig_random_range = random_range self._random_range = start_range @@ -306,8 +315,8 @@ def __init__(self, if curriculum_target_angle: angle_str = ", start_angle {}".format(self._random_angle) logging.info( - "start_range %f%s, reward_thresh_to_increase_range %f", - self._start_range, angle_str, + "Env %d: start_range %f%s, reward_thresh_to_increase_range %f", + self._env._port, self._start_range, angle_str, self._reward_thresh_to_increase_range) else: self._random_range = random_range @@ -328,13 +337,15 @@ def _push_reward_queue(self, value): self._reward_thresh_to_increase_range): if self._curriculum_target_angle: self._random_angle += 20 - logging.info("Raising random_angle to %d", self._random_angle) + logging.info("Env %d: Raising random_angle to %d", + self._env._port, self._random_angle) if (not self._curriculum_target_angle or self._random_angle > 360): self._random_angle = 60 new_range = min((1. + self._increase_range_by_percent) * self._random_range, self._orig_random_range) if self._random_range < self._orig_random_range: - logging.info("Raising random_range to %f", new_range) + logging.info("Env %d: Raising random_range to %f", + self._env._port, new_range) self._random_range = new_range self._q.clear() @@ -417,12 +428,14 @@ def run(self): else: reward = (self._prev_dist - dist) / self._initial_dist reward = reward - distraction_penalty + done = False if distraction_penalty > 0: logging.debug("yielding reward: " + str(reward)) self._push_reward_queue(0) + done = self.end_on_hitting_distraction self._prev_dist = dist agent_sentence = yield TeacherAction( - reward=reward, sentence=self._goal_name) + reward=reward, sentence=self._goal_name, done=done) reward = -1.0 logging.debug("yielding reward: {}, took more than {} steps".format( str(reward), str(self._max_steps))) @@ -597,6 +610,7 @@ def task_specific_observation(self, agent): else: pose = np.concatenate((pose, obj_pos), axis=0) + obj_array = [] agent_pose = np.array(agent.get_pose()).flatten() if self._use_egocentric_states: yaw = agent_pose[5] @@ -604,6 +618,12 @@ def task_specific_observation(self, agent): vx, vy, vz, a1, a2, a3 = np.array(agent.get_velocities()).flatten() rvx, rvy = agent.get_egocentric_cord_2d(vx, vy, yaw) obs = [rvx, rvy, vz, a1, a2, a3] + if self._egocentric_perception_range > 0: + obj_array.extend([[0, rvx, rvy, vz], [1, a1, a2, a3]]) + i = 2 + else: + obj_array.extend([[0, rvx, rvy], [1, vz, a1], [2, a2, a3]]) + i = 3 # adds objects' (goal's as well as distractions') egocentric # coordinates to observation while len(pose) > 1: @@ -621,8 +641,10 @@ def task_specific_observation(self, agent): rotated_x = 0. rotated_y = 0. magnitude = 0. + obj_array.append([i, rotated_x, rotated_y, magnitude]) obs.extend([rotated_x, rotated_y, magnitude]) else: + obj_array.append([i, rotated_x, rotated_y]) obs.extend([rotated_x, rotated_y]) pose = pose[3:] obs = np.array(obs) @@ -631,6 +653,10 @@ def task_specific_observation(self, agent): joints_states = agent.get_internal_states() obs = np.concatenate((pose, agent_pose, agent_vel, joints_states), axis=0) + + if self._order_obj_by_view: + obj_array.sort(key=operator.itemgetter(2)) + obs = np.array(obj_array).flatten() return obs From f37876ad9597c30f6e513406b8c50eec017d332f Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Fri, 14 Feb 2020 15:21:06 -0800 Subject: [PATCH 02/53] field of view back to 120 degrees --- python/social_bot/models/pioneer2dx_noplugin/model.sdf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/social_bot/models/pioneer2dx_noplugin/model.sdf b/python/social_bot/models/pioneer2dx_noplugin/model.sdf index 045ba5c6..9da8cc9a 100644 --- a/python/social_bot/models/pioneer2dx_noplugin/model.sdf +++ b/python/social_bot/models/pioneer2dx_noplugin/model.sdf @@ -73,7 +73,7 @@ - 1.0472 + 2.0944 128 128 From 781e6c2633ac690f4323dd843408f0e2bcd4fecf Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Thu, 14 May 2020 14:24:16 -0700 Subject: [PATCH 03/53] add agent's absolute position and pose into observation --- python/social_bot/tasks.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 8ff4838e..32df1082 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -617,13 +617,19 @@ def task_specific_observation(self, agent): # adds egocentric velocity input vx, vy, vz, a1, a2, a3 = np.array(agent.get_velocities()).flatten() rvx, rvy = agent.get_egocentric_cord_2d(vx, vy, yaw) - obs = [rvx, rvy, vz, a1, a2, a3] + obs = [rvx, rvy, vz, a1, a2, a3] + list(agent_pose) if self._egocentric_perception_range > 0: - obj_array.extend([[0, rvx, rvy, vz], [1, a1, a2, a3]]) - i = 2 + obj_array.extend( + [[0, rvx, rvy, vz], [1, a1, a2, a3], + [2, agent_pose[0], agent_pose[1], agent_pose[2]], + [3, agent_pose[3], agent_pose[4], agent_pose[5]]]) + i = 4 else: - obj_array.extend([[0, rvx, rvy], [1, vz, a1], [2, a2, a3]]) - i = 3 + obj_array.extend([[0, rvx, rvy], [1, vz, a1], [2, a2, a3], + [3, agent_pose[0], agent_pose[1]], + [4, agent_pose[2], agent_pose[3]], + [5, agent_pose[4], agent_pose[5]]]) + i = 6 # adds objects' (goal's as well as distractions') egocentric # coordinates to observation while len(pose) > 1: From ab15b68cc624b2b0b79a1b346e2cc40cd854d8cd Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Mon, 27 Jul 2020 23:13:36 -0700 Subject: [PATCH 04/53] non-egocentric full state observation for GoalTask --- python/social_bot/tasks.py | 35 ++++++++++++++++++++++++++++++----- 1 file changed, 30 insertions(+), 5 deletions(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 32df1082..bf99e976 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -247,6 +247,7 @@ def __init__( flag will overwrite the effects of flags ``switch_goal_within_episode`` and ``move_goal_during_episode``. success_with_angle_requirement: if True then calculate the reward considering the angular requirement additional_observation_list: a list of additonal objects to be added + use_full_states (bool): For non-image observation, use full states of the world use_egocentric_states (bool): For the non-image observation case, use the states transformed to egocentric coordinate, e.g., agent's egocentric distance and direction to goal egocentric_perception_range (float): the max range in degree to limit the agent's observation. @@ -300,6 +301,7 @@ def __init__( self._max_play_ground_size))) self._pos_list.remove((0, 0)) self._polar_coord = polar_coord + self._use_full_states = use_full_states self._use_egocentric_states = use_egocentric_states self._egocentric_perception_range = egocentric_perception_range self._order_obj_by_view = order_obj_by_view @@ -320,6 +322,19 @@ def __init__( self._reward_thresh_to_increase_range) else: self._random_range = random_range + obs_format = "image" + if use_full_states or use_egocentric_states: + obs_format = "full_state" + obs_relative = "ego" + if use_full_states and not use_egocentric_states: + obs_relative = "absolute" + logging.info("Observations: {}, {}.".format(obs_format, obs_relative)) + if use_full_states and not use_egocentric_states: + if not order_obj_by_view: + logging.info( + "Dims: 0-5: agent's velocity and angular " + + "velocity, 6-11: agent's position and pose, 12-13: goal x, y" + + ", all distractions' x, y coordinates.") self.task_vocab += self._object_list self._env.insert_model_list(self._object_list) @@ -612,11 +627,14 @@ def task_specific_observation(self, agent): obj_array = [] agent_pose = np.array(agent.get_pose()).flatten() - if self._use_egocentric_states: + if self._use_full_states or self._use_egocentric_states: yaw = agent_pose[5] # adds egocentric velocity input vx, vy, vz, a1, a2, a3 = np.array(agent.get_velocities()).flatten() - rvx, rvy = agent.get_egocentric_cord_2d(vx, vy, yaw) + if self._use_egocentric_states: + rvx, rvy = agent.get_egocentric_cord_2d(vx, vy, yaw) + else: + rvx, rvy = vx, vy obs = [rvx, rvy, vz, a1, a2, a3] + list(agent_pose) if self._egocentric_perception_range > 0: obj_array.extend( @@ -633,9 +651,15 @@ def task_specific_observation(self, agent): # adds objects' (goal's as well as distractions') egocentric # coordinates to observation while len(pose) > 1: - x = pose[0] - agent_pose[0] - y = pose[1] - agent_pose[1] - rotated_x, rotated_y = agent.get_egocentric_cord_2d(x, y, yaw) + x = pose[0] + y = pose[1] + if self._use_egocentric_states: + x = pose[0] - agent_pose[0] + y = pose[1] - agent_pose[1] + rotated_x, rotated_y = agent.get_egocentric_cord_2d( + x, y, yaw) + else: + rotated_x, rotated_y = x, y if self._egocentric_perception_range > 0: dist = math.sqrt(rotated_x * rotated_x + rotated_y * rotated_y) @@ -661,6 +685,7 @@ def task_specific_observation(self, agent): axis=0) if self._order_obj_by_view: + # TODO: skip first few dims corresponding to rvx, a1 etc.. obj_array.sort(key=operator.itemgetter(2)) obs = np.array(obj_array).flatten() return obs From 4b1dfd12ae70f81dc26105a33dff24c472c118c8 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Wed, 29 Jul 2020 21:50:37 -0700 Subject: [PATCH 05/53] goal conditioned GoalTask and PlayGround env, with -1/0 reward --- python/social_bot/envs/play_ground.py | 31 +++++++++++++++++++++++---- python/social_bot/tasks.py | 12 +++++++++-- 2 files changed, 37 insertions(+), 6 deletions(-) diff --git a/python/social_bot/envs/play_ground.py b/python/social_bot/envs/play_ground.py index 54a7453e..4cee7fdd 100644 --- a/python/social_bot/envs/play_ground.py +++ b/python/social_bot/envs/play_ground.py @@ -14,6 +14,7 @@ """ A simple enviroment for an agent playing on the ground """ +from collections import OrderedDict import os import time import math @@ -71,6 +72,7 @@ def __init__(self, agent_type='pioneer2dx_noplugin', world_name="play_ground.world", tasks=[GoalTask], + goal_conditioned=False, with_language=False, with_agent_language=False, use_image_observation=False, @@ -93,6 +95,7 @@ def __init__(self, world_name (string): Select the world file, e.g., empty.world, play_ground.world, grocery_ground.world tasks (list): a list of teacher.Task, e.g., GoalTask, KickingBallTask + goal_conditioned (bool): Turn on goal conditioned tasks. with_language (bool): The observation will be a dict with an extra sentence with_agent_language (bool): Include agent sentence in action space. use_image_observation (bool): Use image, or use low-dimentional states as @@ -127,6 +130,7 @@ def __init__(self, """ self._action_cost = action_cost + self._goal_conditioned = goal_conditioned self._with_language = with_language self._seq_length = vocab_sequence_length self._with_agent_language = with_language and with_agent_language @@ -186,6 +190,27 @@ def __init__(self, self.reset() self.observation_space = self._agent.get_observation_space( self._teacher) + if self._goal_conditioned: + assert not with_language + assert not use_image_observation + assert not image_with_internal_states + # state observation + goal_space = gym.spaces.Box( + low=-np.inf, high=np.inf, shape=(2, ), dtype=np.float32) + ag_space = goal_space + self.observation_space = gym.spaces.Dict( + observation=self.observation_space, + desired_goal=goal_space, + achieved_goal=ag_space) + + def _get_observation(self, teacher_sentence): + obs = self._agent.get_observation(self._teacher, teacher_sentence) + if self._goal_conditioned: + orig = obs + obs = OrderedDict(observation=obs) + obs['achieved_goal'] = orig[6:8] + obs['desired_goal'] = orig[12:14] + return obs def reset(self): """ @@ -202,8 +227,7 @@ def reset(self): # The first call of "teach() after "done" will reset the task teacher_action = self._teacher.teach("") self._world.step(self._sub_steps) - obs = self._agent.get_observation(self._teacher, - teacher_action.sentence) + obs = self._get_observation(teacher_action.sentence) return obs def step(self, action): @@ -229,8 +253,7 @@ def step(self, action): self._agent.take_action(controls) self._world.step(self._sub_steps) teacher_action = self._teacher.teach(sentence) - obs = self._agent.get_observation(self._teacher, - teacher_action.sentence) + obs = self._get_observation(teacher_action.sentence) self._steps_in_this_episode += 1 ctrl_cost = np.sum(np.square(controls)) / controls.shape[0] reward = teacher_action.reward - self._action_cost * ctrl_cost diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index bf99e976..c44f51dd 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -178,6 +178,7 @@ def __init__( distraction_list=[ 'coke_can', 'table', 'car_wheel', 'plastic_cup', 'beer' ], + goal_conditioned=False, end_on_hitting_distraction=False, end_episode_after_success=False, success_distance_thresh=0.5, @@ -211,6 +212,8 @@ def __init__( max_steps (int): episode will end if not reaching gaol in so many steps goal_name (string): name of the goal in the world distraction_list (list of string): a list of model. the model shoud be in gazebo database + end_episode_after_success (bool): if True, the episode will end once the goal is reached. A True value of this + flag will overwrite the effects of flags ``switch_goal_within_episode`` and ``move_goal_during_episode``. end_on_hitting_distraction (bool): whether to end episode on hitting distraction success_distance_thresh (float): the goal is reached if it's within this distance to the agent fail_distance_thresh (float): if the agent moves away from the goal more than this distance, @@ -243,8 +246,6 @@ def __init__( max_reward_q_length (int): how many recent rewards to consider when estimating agent accuracy. reward_weight (float): the weight of the reward, is used in multi-task case move_goal_during_episode (bool): if True, the goal will be moved during episode, when it has been achieved - end_episode_after_success (bool): if True, the episode will end once the goal is reached. A True value of this - flag will overwrite the effects of flags ``switch_goal_within_episode`` and ``move_goal_during_episode``. success_with_angle_requirement: if True then calculate the reward considering the angular requirement additional_observation_list: a list of additonal objects to be added use_full_states (bool): For non-image observation, use full states of the world @@ -263,6 +264,7 @@ def __init__( super().__init__( env=env, max_steps=max_steps, reward_weight=reward_weight) self._goal_name = goal_name + self._goal_conditioned = goal_conditioned self.end_on_hitting_distraction = end_on_hitting_distraction self._success_distance_thresh = success_distance_thresh self._fail_distance_thresh = fail_distance_thresh @@ -411,11 +413,15 @@ def run(self): self._get_distraction_penalty(loc, dot, prev_min_dist_to_distraction)) + # TODO(Le): compare achieved goal with desired goal if task is + # goal conditioned? if dist < self._success_distance_thresh and ( not self._success_with_angle_requirement or dot > 0.707): # within 45 degrees of the agent direction reward = 1.0 - distraction_penalty self._push_reward_queue(max(reward, 0)) + if self._goal_conditioned: + reward -= 1. logging.debug("yielding reward: " + str(reward)) agent_sentence = yield TeacherAction( reward=reward, sentence="well done", @@ -440,6 +446,8 @@ def run(self): else: if self._sparse_reward: reward = 0 + if self._goal_conditioned: + reward = -1 else: reward = (self._prev_dist - dist) / self._initial_dist reward = reward - distraction_penalty From fd43286da83464c0e420191013b5f8f396f25e8f Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Wed, 29 Jul 2020 23:33:51 -0700 Subject: [PATCH 06/53] simplify --- python/social_bot/envs/play_ground.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/python/social_bot/envs/play_ground.py b/python/social_bot/envs/play_ground.py index 4cee7fdd..d1b4a23d 100644 --- a/python/social_bot/envs/play_ground.py +++ b/python/social_bot/envs/play_ground.py @@ -197,19 +197,18 @@ def __init__(self, # state observation goal_space = gym.spaces.Box( low=-np.inf, high=np.inf, shape=(2, ), dtype=np.float32) - ag_space = goal_space self.observation_space = gym.spaces.Dict( observation=self.observation_space, desired_goal=goal_space, - achieved_goal=ag_space) + achieved_goal=goal_space) def _get_observation(self, teacher_sentence): obs = self._agent.get_observation(self._teacher, teacher_sentence) if self._goal_conditioned: - orig = obs + flat_obs = obs obs = OrderedDict(observation=obs) - obs['achieved_goal'] = orig[6:8] - obs['desired_goal'] = orig[12:14] + obs['achieved_goal'] = flat_obs[6:8] + obs['desired_goal'] = flat_obs[12:14] return obs def reset(self): From e2c6130cf1f0006399a8e0f478be3b2695c36a12 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Wed, 29 Jul 2020 23:55:23 -0700 Subject: [PATCH 07/53] fix after rebase --- python/social_bot/tasks.py | 86 ++++++++++++++++++++------------------ 1 file changed, 45 insertions(+), 41 deletions(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index c44f51dd..d9b6c49f 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -170,42 +170,42 @@ class GoalTask(Task): it will get reward -1. """ - def __init__( - self, - env, - max_steps, - goal_name="ball", - distraction_list=[ - 'coke_can', 'table', 'car_wheel', 'plastic_cup', 'beer' - ], - goal_conditioned=False, - end_on_hitting_distraction=False, - end_episode_after_success=False, - success_distance_thresh=0.5, - fail_distance_thresh=2.0, - distraction_penalty_distance_thresh=0, - distraction_penalty=0.5, - random_agent_orientation=False, - sparse_reward=True, - random_range=5.0, - polar_coord=True, - random_goal=False, - use_curriculum_training=False, - curriculum_distractions=True, - curriculum_target_angle=False, - switch_goal_within_episode=False, - start_range=0, - increase_range_by_percent=50., - reward_thresh_to_increase_range=0.4, - percent_full_range_in_curriculum=0.1, - max_reward_q_length=100, - reward_weight=1.0, - move_goal_during_episode=True, - success_with_angle_requirement=True, - additional_observation_list=[], - use_egocentric_states=False, - egocentric_perception_range=0, - order_obj_by_view=False): + def __init__(self, + env, + max_steps, + goal_name="ball", + distraction_list=[ + 'coke_can', 'table', 'car_wheel', 'plastic_cup', 'beer' + ], + goal_conditioned=False, + end_on_hitting_distraction=False, + end_episode_after_success=False, + success_distance_thresh=0.5, + fail_distance_thresh=2.0, + distraction_penalty_distance_thresh=0, + distraction_penalty=0.5, + random_agent_orientation=False, + sparse_reward=True, + random_range=5.0, + polar_coord=True, + random_goal=False, + use_curriculum_training=False, + curriculum_distractions=True, + curriculum_target_angle=False, + switch_goal_within_episode=False, + start_range=0, + increase_range_by_percent=50., + reward_thresh_to_increase_range=0.4, + percent_full_range_in_curriculum=0.1, + max_reward_q_length=100, + reward_weight=1.0, + move_goal_during_episode=True, + success_with_angle_requirement=True, + additional_observation_list=[], + use_full_states=False, + use_egocentric_states=False, + egocentric_perception_range=0, + order_obj_by_view=False): """ Args: env (gym.Env): an instance of Environment @@ -424,7 +424,8 @@ def run(self): reward -= 1. logging.debug("yielding reward: " + str(reward)) agent_sentence = yield TeacherAction( - reward=reward, sentence="well done", + reward=reward, + sentence="well done", done=self._end_episode_after_success, success=True) steps_since_last_reward = 0 @@ -975,15 +976,16 @@ def run(self): else: reward = 100. agent_sentence = yield TeacherAction( - reward=reward, sentence="well done", done=True, + reward=reward, + sentence="well done", + done=True, success=True) else: if self._sparse_reward: reward = 0. else: reward = self._target_speed + 3 - dist / init_goal_dist - agent_sentence = yield TeacherAction( - reward=reward) + agent_sentence = yield TeacherAction(reward=reward) yield TeacherAction(reward=-1.0, sentence="failed", done=True) def task_specific_observation(self, agent): @@ -1204,7 +1206,9 @@ def run(self): logging.debug("object has been successfuly placed") reward = 200.0 if self._reward_shaping else 1.0 agent_sentence = yield TeacherAction( - reward=reward, sentence="well done", done=True, + reward=reward, + sentence="well done", + done=True, success=True) else: shaped_reward = max( From 5aa122e5aea2adcae1f39fe957813905e33d33d9 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Fri, 31 Jul 2020 23:37:46 -0700 Subject: [PATCH 08/53] plot goal range in tensorboard --- python/social_bot/envs/play_ground.py | 8 +++++++- python/social_bot/tasks.py | 26 ++++++++++++++++++++------ python/social_bot/teacher.py | 25 ++++++++++++++++++++----- 3 files changed, 47 insertions(+), 12 deletions(-) diff --git a/python/social_bot/envs/play_ground.py b/python/social_bot/envs/play_ground.py index d1b4a23d..b4a9a90a 100644 --- a/python/social_bot/envs/play_ground.py +++ b/python/social_bot/envs/play_ground.py @@ -174,7 +174,10 @@ def __init__(self, # Setup teacher and tasks self._teacher = teacher.Teacher(task_groups_exclusive=False) + self._has_goal_task = False for task in tasks: + if task == GoalTask: + self._has_goal_task = True task_group = TaskGroup() task_group.add_task(task(env=self, max_steps=max_steps)) self._teacher.add_task_group(task_group) @@ -260,7 +263,10 @@ def step(self, action): if teacher_action.done: logging.debug("episode ends at cum reward:" + str(self._cum_reward)) - return obs, reward, teacher_action.done, {"is_success": teacher_action.success} + info = {"is_success": teacher_action.success} + if self._has_goal_task: + info["goal_range"] = teacher_action.goal_range + return obs, reward, teacher_action.done, info def get_step_time(self): """ diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index d9b6c49f..4791333a 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -193,7 +193,7 @@ def __init__(self, curriculum_distractions=True, curriculum_target_angle=False, switch_goal_within_episode=False, - start_range=0, + start_range=0.0, increase_range_by_percent=50., reward_thresh_to_increase_range=0.4, percent_full_range_in_curriculum=0.1, @@ -423,11 +423,14 @@ def run(self): if self._goal_conditioned: reward -= 1. logging.debug("yielding reward: " + str(reward)) + done = self._end_episode_after_success + goal_range = self._random_range if done else 0 agent_sentence = yield TeacherAction( reward=reward, sentence="well done", - done=self._end_episode_after_success, - success=True) + done=done, + success=True, + goal_range=goal_range) steps_since_last_reward = 0 if self._switch_goal_within_episode: self.pick_goal() @@ -443,7 +446,10 @@ def run(self): "yielding reward: {}, farther than {} from goal".format( str(reward), str(self._fail_distance_thresh))) yield TeacherAction( - reward=reward, sentence="failed", done=True) + reward=reward, + sentence="failed", + done=True, + goal_range=self._random_range) else: if self._sparse_reward: reward = 0 @@ -458,8 +464,12 @@ def run(self): self._push_reward_queue(0) done = self.end_on_hitting_distraction self._prev_dist = dist + goal_range = self._random_range if done else 0 agent_sentence = yield TeacherAction( - reward=reward, sentence=self._goal_name, done=done) + reward=reward, + sentence=self._goal_name, + done=done, + goal_range=goal_range) reward = -1.0 logging.debug("yielding reward: {}, took more than {} steps".format( str(reward), str(self._max_steps))) @@ -467,7 +477,11 @@ def run(self): if self.should_use_curriculum_training(): logging.debug("reward queue len: {}, sum: {}".format( str(len(self._q)), str(sum(self._q)))) - yield TeacherAction(reward=reward, sentence="failed", done=True) + yield TeacherAction( + reward=reward, + sentence="failed", + done=True, + goal_range=self._random_range) def _get_distraction_penalty(self, agent_loc, dot, prev_min_dist_to_distraction): diff --git a/python/social_bot/teacher.py b/python/social_bot/teacher.py index e9f4ea40..7c84dd9b 100644 --- a/python/social_bot/teacher.py +++ b/python/social_bot/teacher.py @@ -36,8 +36,13 @@ def __init__(self, vocab_size, max_length): class TeacherAction(object): - def __init__(self, reward=0.0, sentence="", done=False, is_idle=False, - success=False): + def __init__(self, + reward=0.0, + sentence="", + done=False, + is_idle=False, + success=False, + goal_range=0.0): """ Args: done: end of an episode if true @@ -48,6 +53,7 @@ def __init__(self, reward=0.0, sentence="", done=False, is_idle=False, self.done = done self.is_idle = is_idle self.success = success + self.goal_range = float(goal_range) class TaskGroup(object): @@ -86,6 +92,7 @@ def teach(self, agent_sentence): TeacherAction """ task = self._get_current_task() + teacher_action = None try: # teacher_action is the value yielded in task teacher_action = task.send(agent_sentence) @@ -98,7 +105,8 @@ def teach(self, agent_sentence): task.close() self._current_task = None self._is_idle = True - teacher_action = TeacherAction() + teacher_action = TeacherAction( + goal_range=teacher_action.goal_range if teacher_action else 0) return teacher_action @@ -334,6 +342,7 @@ def teach(self, agent_sentence): done = False active_group_id = -1 success = False + goal_range = 0 # run all groups in parallel for i, g in enumerate(self._task_groups): teacher_action = g.teach(agent_sentence) @@ -341,6 +350,8 @@ def teach(self, agent_sentence): done = True if teacher_action.success: success = True + if teacher_action.goal_range > 0: + goal_range = teacher_action.goal_range weight = g.get_current_reward_weight() final_reward += weight * teacher_action.reward if not final_sentence: @@ -349,6 +360,10 @@ def teach(self, agent_sentence): if active_group_id != -1: g = self._task_groups.pop(active_group_id) self._task_groups.insert(0, g) - return_action = TeacherAction(final_reward, final_sentence, done, - success=success) + return_action = TeacherAction( + final_reward, + final_sentence, + done, + success=success, + goal_range=goal_range) return return_action From 341460c26b24e3ccc78fd1be26fcedcefe62edef Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Mon, 3 Aug 2020 12:00:03 -0700 Subject: [PATCH 09/53] add comments for goal conditioned task --- python/social_bot/envs/play_ground.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/python/social_bot/envs/play_ground.py b/python/social_bot/envs/play_ground.py index b4a9a90a..0a65a0a5 100644 --- a/python/social_bot/envs/play_ground.py +++ b/python/social_bot/envs/play_ground.py @@ -95,7 +95,12 @@ def __init__(self, world_name (string): Select the world file, e.g., empty.world, play_ground.world, grocery_ground.world tasks (list): a list of teacher.Task, e.g., GoalTask, KickingBallTask - goal_conditioned (bool): Turn on goal conditioned tasks. + goal_conditioned (bool): Turn on goal conditioned tasks. Currently only GoalTask + with full state observation has this mode enabled. The observation will + become an OrderedDict including ``achieved_goal``, ``desired_goal`` and + ``observation`` fields, instead of a flat np array. ``achieved_goal`` will + contain agent's current 2-d (x, y) position and ``desired_goal`` will be + the goal object's 2-d position. Reward and task termination remain unchanged. with_language (bool): The observation will be a dict with an extra sentence with_agent_language (bool): Include agent sentence in action space. use_image_observation (bool): Use image, or use low-dimentional states as From 36c369075421c485bced5b896fe72d4f33add22b Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Wed, 12 Aug 2020 22:16:59 -0700 Subject: [PATCH 10/53] properly return random_range always, to report goal_range metric (episodic mean) --- python/social_bot/tasks.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 4791333a..9a3fcd8a 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -424,13 +424,12 @@ def run(self): reward -= 1. logging.debug("yielding reward: " + str(reward)) done = self._end_episode_after_success - goal_range = self._random_range if done else 0 agent_sentence = yield TeacherAction( reward=reward, sentence="well done", done=done, success=True, - goal_range=goal_range) + goal_range=self._random_range) steps_since_last_reward = 0 if self._switch_goal_within_episode: self.pick_goal() @@ -464,12 +463,11 @@ def run(self): self._push_reward_queue(0) done = self.end_on_hitting_distraction self._prev_dist = dist - goal_range = self._random_range if done else 0 agent_sentence = yield TeacherAction( reward=reward, sentence=self._goal_name, done=done, - goal_range=goal_range) + goal_range=self._random_range) reward = -1.0 logging.debug("yielding reward: {}, took more than {} steps".format( str(reward), str(self._max_steps))) From 596e0d64b0b45292de136b3752a34e3c9aec2d40 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Fri, 14 Aug 2020 11:40:28 -0700 Subject: [PATCH 11/53] fix goal_range reporting to only report it once at episode end --- python/social_bot/tasks.py | 48 +++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 21 deletions(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 9a3fcd8a..299aea37 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -324,6 +324,7 @@ def __init__(self, self._reward_thresh_to_increase_range) else: self._random_range = random_range + self._goal_dist = 0. obs_format = "image" if use_full_states or use_egocentric_states: obs_format = "full_state" @@ -382,6 +383,19 @@ def _get_agent_loc(self): loc = np.array(loc) return loc, agent_dir + def _prepare_teacher_action(self, reward, sentence, done, success=False): + goal_dist = 0. + if done: + goal_dist = self._goal_dist + # clear self._goal_dist so it is only output once + self._goal_dist = 0. + return TeacherAction( + reward=reward, + sentence=sentence, + done=done, + success=success, + goal_range=goal_dist) + def run(self): """ Start a teaching episode for this task. """ agent_sentence = yield @@ -424,12 +438,11 @@ def run(self): reward -= 1. logging.debug("yielding reward: " + str(reward)) done = self._end_episode_after_success - agent_sentence = yield TeacherAction( + agent_sentence = yield self._prepare_teacher_action( reward=reward, sentence="well done", done=done, - success=True, - goal_range=self._random_range) + success=True) steps_since_last_reward = 0 if self._switch_goal_within_episode: self.pick_goal() @@ -444,11 +457,8 @@ def run(self): logging.debug( "yielding reward: {}, farther than {} from goal".format( str(reward), str(self._fail_distance_thresh))) - yield TeacherAction( - reward=reward, - sentence="failed", - done=True, - goal_range=self._random_range) + yield self._prepare_teacher_action( + reward=reward, sentence="failed", done=True) else: if self._sparse_reward: reward = 0 @@ -463,11 +473,8 @@ def run(self): self._push_reward_queue(0) done = self.end_on_hitting_distraction self._prev_dist = dist - agent_sentence = yield TeacherAction( - reward=reward, - sentence=self._goal_name, - done=done, - goal_range=self._random_range) + agent_sentence = yield self._prepare_teacher_action( + reward=reward, sentence=self._goal_name, done=done) reward = -1.0 logging.debug("yielding reward: {}, took more than {} steps".format( str(reward), str(self._max_steps))) @@ -475,11 +482,8 @@ def run(self): if self.should_use_curriculum_training(): logging.debug("reward queue len: {}, sum: {}".format( str(len(self._q)), str(sum(self._q)))) - yield TeacherAction( - reward=reward, - sentence="failed", - done=True, - goal_range=self._random_range) + yield self._prepare_teacher_action( + reward=reward, sentence="failed", done=True) def _get_distraction_penalty(self, agent_loc, dot, prev_min_dist_to_distraction): @@ -516,12 +520,13 @@ def _move_goal(self, goal, agent_loc, agent_dir): Move goal as well as all distraction objects to a random location. """ avoid_locations = [agent_loc] - loc = self._move_obj( + loc, dist = self._move_obj( obj=goal, agent_loc=agent_loc, agent_dir=agent_dir, is_goal=True, avoid_locations=avoid_locations) + self._goal_dist += dist avoid_locations.append(loc) distractions = OrderedDict() for item in self._distraction_list: @@ -530,7 +535,7 @@ def _move_goal(self, goal, agent_loc, agent_dir): if len(distractions) and self._curriculum_distractions: for item, _ in distractions.items(): distraction = self._world.get_agent(item) - loc = self._move_obj( + loc, _ = self._move_obj( obj=distraction, agent_loc=agent_loc, agent_dir=agent_dir, @@ -553,6 +558,7 @@ def _move_obj(self, range = self._random_range self._is_full_range_in_curriculum = False attempts = 0 + dist = range while True: attempts += 1 dist = random.random() * range @@ -592,7 +598,7 @@ def _move_obj(self, self._prev_dist = self._initial_dist obj.reset() obj.set_pose((loc, (0, 0, 0))) - return loc + return loc, dist def _random_move_objects(self, random_range=10.0): obj_num = len(self._object_list) From 5ef28e97ca7e64564cef85cc1a79e59dfbf065ff Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Mon, 24 Aug 2020 23:10:28 -0700 Subject: [PATCH 12/53] terminate gzclient at env close(), fix bug where direction not ignored when flag says to ignore goal direction --- python/social_bot/envs/gazebo_base.py | 1 + python/social_bot/tasks.py | 9 ++++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/python/social_bot/envs/gazebo_base.py b/python/social_bot/envs/gazebo_base.py index 4dffc92c..a7c94f59 100644 --- a/python/social_bot/envs/gazebo_base.py +++ b/python/social_bot/envs/gazebo_base.py @@ -134,6 +134,7 @@ def render(self, mode='human'): def close(self): super().close() gazebo.close_without_model_base_fini() + self.__del__() def insert_model(self, model, name=None, pose="0 0 0 0 0 0"): """ diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 299aea37..398caf87 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -396,6 +396,9 @@ def _prepare_teacher_action(self, reward, sentence, done, success=False): success=success, goal_range=goal_dist) + def _within_angle(self, dot): + return (not self._success_with_angle_requirement) or dot > 0.707 + def run(self): """ Start a teaching episode for this task. """ agent_sentence = yield @@ -429,8 +432,8 @@ def run(self): # TODO(Le): compare achieved goal with desired goal if task is # goal conditioned? - if dist < self._success_distance_thresh and ( - not self._success_with_angle_requirement or dot > 0.707): + if dist < self._success_distance_thresh and self._within_angle( + dot): # within 45 degrees of the agent direction reward = 1.0 - distraction_penalty self._push_reward_queue(max(reward, 0)) @@ -504,7 +507,7 @@ def _get_distraction_penalty(self, agent_loc, dot, if (distraction_dist >= self._distraction_penalty_distance_thresh): continue - if obj_name == self._goal_name and dot > 0.707: + if obj_name == self._goal_name and self._within_angle(dot): continue # correctly getting to goal, no penalty if distraction_dist < curr_min_dist: curr_min_dist = distraction_dist From b25a51c1ab6fb36d62df02f8eba6900b575a23eb Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Thu, 27 Aug 2020 15:38:08 -0700 Subject: [PATCH 13/53] address comments --- python/social_bot/envs/gazebo_base.py | 7 ++--- python/social_bot/envs/play_ground.py | 6 ++++- python/social_bot/tasks.py | 37 +++++---------------------- 3 files changed, 14 insertions(+), 36 deletions(-) diff --git a/python/social_bot/envs/gazebo_base.py b/python/social_bot/envs/gazebo_base.py index a7c94f59..e698d0f3 100644 --- a/python/social_bot/envs/gazebo_base.py +++ b/python/social_bot/envs/gazebo_base.py @@ -134,7 +134,8 @@ def render(self, mode='human'): def close(self): super().close() gazebo.close_without_model_base_fini() - self.__del__() + if self._rendering_process is not None: + self._rendering_process.terminate() def insert_model(self, model, name=None, pose="0 0 0 0 0 0"): """ @@ -194,10 +195,6 @@ def seed(self, seed=None): """Gym interface for setting random seed.""" random.seed(seed) - def __del__(self): - if self._rendering_process is not None: - self._rendering_process.terminate() - def _modify_world_xml(xml, modifications): """Modify world xml content diff --git a/python/social_bot/envs/play_ground.py b/python/social_bot/envs/play_ground.py index 0a65a0a5..214c62ac 100644 --- a/python/social_bot/envs/play_ground.py +++ b/python/social_bot/envs/play_ground.py @@ -100,7 +100,9 @@ def __init__(self, become an OrderedDict including ``achieved_goal``, ``desired_goal`` and ``observation`` fields, instead of a flat np array. ``achieved_goal`` will contain agent's current 2-d (x, y) position and ``desired_goal`` will be - the goal object's 2-d position. Reward and task termination remain unchanged. + the goal object's 2-d position. Reward becomes 0 for reaching goal, -1 for + any other step (assuming no distraction penalty). Task termination remains + unchanged. with_language (bool): The observation will be a dict with an extra sentence with_agent_language (bool): Include agent sentence in action space. use_image_observation (bool): Use image, or use low-dimentional states as @@ -199,9 +201,11 @@ def __init__(self, self.observation_space = self._agent.get_observation_space( self._teacher) if self._goal_conditioned: + assert agent_type == 'pioneer2dx_noplugin' assert not with_language assert not use_image_observation assert not image_with_internal_states + assert len(tasks) == 1 and tasks[0] == GoalTask # state observation goal_space = gym.spaces.Box( low=-np.inf, high=np.inf, shape=(2, ), dtype=np.float32) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 398caf87..05ab2d6a 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -204,8 +204,7 @@ def __init__(self, additional_observation_list=[], use_full_states=False, use_egocentric_states=False, - egocentric_perception_range=0, - order_obj_by_view=False): + egocentric_perception_range=0): """ Args: env (gym.Env): an instance of Environment @@ -219,7 +218,8 @@ def __init__(self, fail_distance_thresh (float): if the agent moves away from the goal more than this distance, it's considered a failure and is given reward -1 distraction_penalty_distance_thresh (float): if positive, penalize agent getting too close - to distraction objects (objects that are not the goal itself) + to distraction objects (objects that include the goal itself, as approaching goal without + facing it is considered hitting a distraction) distraction_penalty (float): positive float of how much to penalize getting too close to distraction objects random_agent_orientation (bool): whether randomize the orientation (yaw) of the agent at the beginning of an @@ -254,8 +254,6 @@ def __init__(self, egocentric_perception_range (float): the max range in degree to limit the agent's observation. E.g. 60 means object is only visible when it's within +/-60 degrees in front of the agent's direction (yaw). - order_obj_by_view (bool): False to order object coordinates (full states) in object id order; - True to order according to y axis value (left to right) together with object id. """ self._max_play_ground_size = 5 # play ground will be (-5, 5) for both x and y axes. # TODO: Remove the default grey walls in the play ground world file, @@ -306,7 +304,6 @@ def __init__(self, self._use_full_states = use_full_states self._use_egocentric_states = use_egocentric_states self._egocentric_perception_range = egocentric_perception_range - self._order_obj_by_view = order_obj_by_view if self.should_use_curriculum_training(): self._orig_random_range = random_range self._random_range = start_range @@ -333,11 +330,10 @@ def __init__(self, obs_relative = "absolute" logging.info("Observations: {}, {}.".format(obs_format, obs_relative)) if use_full_states and not use_egocentric_states: - if not order_obj_by_view: - logging.info( - "Dims: 0-5: agent's velocity and angular " + - "velocity, 6-11: agent's position and pose, 12-13: goal x, y" - + ", all distractions' x, y coordinates.") + logging.info( + "Dims: 0-5: agent's velocity and angular " + + "velocity, 6-11: agent's position and pose, 12-13: goal x, y" + + ", all distractions' x, y coordinates.") self.task_vocab += self._object_list self._env.insert_model_list(self._object_list) @@ -655,7 +651,6 @@ def task_specific_observation(self, agent): else: pose = np.concatenate((pose, obj_pos), axis=0) - obj_array = [] agent_pose = np.array(agent.get_pose()).flatten() if self._use_full_states or self._use_egocentric_states: yaw = agent_pose[5] @@ -666,18 +661,6 @@ def task_specific_observation(self, agent): else: rvx, rvy = vx, vy obs = [rvx, rvy, vz, a1, a2, a3] + list(agent_pose) - if self._egocentric_perception_range > 0: - obj_array.extend( - [[0, rvx, rvy, vz], [1, a1, a2, a3], - [2, agent_pose[0], agent_pose[1], agent_pose[2]], - [3, agent_pose[3], agent_pose[4], agent_pose[5]]]) - i = 4 - else: - obj_array.extend([[0, rvx, rvy], [1, vz, a1], [2, a2, a3], - [3, agent_pose[0], agent_pose[1]], - [4, agent_pose[2], agent_pose[3]], - [5, agent_pose[4], agent_pose[5]]]) - i = 6 # adds objects' (goal's as well as distractions') egocentric # coordinates to observation while len(pose) > 1: @@ -701,10 +684,8 @@ def task_specific_observation(self, agent): rotated_x = 0. rotated_y = 0. magnitude = 0. - obj_array.append([i, rotated_x, rotated_y, magnitude]) obs.extend([rotated_x, rotated_y, magnitude]) else: - obj_array.append([i, rotated_x, rotated_y]) obs.extend([rotated_x, rotated_y]) pose = pose[3:] obs = np.array(obs) @@ -714,10 +695,6 @@ def task_specific_observation(self, agent): obs = np.concatenate((pose, agent_pose, agent_vel, joints_states), axis=0) - if self._order_obj_by_view: - # TODO: skip first few dims corresponding to rvx, a1 etc.. - obj_array.sort(key=operator.itemgetter(2)) - obs = np.array(obj_array).flatten() return obs From 9f3051d581b97c036a40fbc45ab0942ceeaff973 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Fri, 28 Aug 2020 14:40:01 -0700 Subject: [PATCH 14/53] make "human" rendering consistent with "rgb_array" - video recording --- python/social_bot/envs/gazebo_base.py | 70 +++++++++++++-------------- python/social_bot/envs/play_ground.py | 9 +++- 2 files changed, 43 insertions(+), 36 deletions(-) diff --git a/python/social_bot/envs/gazebo_base.py b/python/social_bot/envs/gazebo_base.py index e698d0f3..595388d1 100644 --- a/python/social_bot/envs/gazebo_base.py +++ b/python/social_bot/envs/gazebo_base.py @@ -82,6 +82,41 @@ def render(self, mode='human'): Args: mode (str): 'human' and 'rgb_array' is supported. """ + if self._rendering_camera is None: + render_camera_sdf = """ + + + + 1 + %s + + + + 0.95 + + 640 + 480 + + + 0.1 + 100 + + + 1 + 30 + true + + + + + """ + render_camera_sdf = render_camera_sdf % self._rendering_cam_pose + self._world.insertModelFromSdfString(render_camera_sdf) + time.sleep(0.2) + self._world.step(20) + self._rendering_camera = self._world.get_agent('render_camera') + image = self._rendering_camera.get_camera_observation( + "default::render_camera::link::camera") if mode == 'human': if self._rendering_process is None: from subprocess import Popen @@ -91,41 +126,6 @@ def render(self, mode='human'): self._rendering_process = Popen(['gzclient']) return if mode == 'rgb_array': - if self._rendering_camera is None: - render_camera_sdf = """ - - - - 1 - %s - - - - 0.95 - - 640 - 480 - - - 0.1 - 100 - - - 1 - 30 - true - - - - - """ - render_camera_sdf = render_camera_sdf % self._rendering_cam_pose - self._world.insertModelFromSdfString(render_camera_sdf) - time.sleep(0.2) - self._world.step(20) - self._rendering_camera = self._world.get_agent('render_camera') - image = self._rendering_camera.get_camera_observation( - "default::render_camera::link::camera") return np.array(image) raise NotImplementedError("rendering mode: " + mode + diff --git a/python/social_bot/envs/play_ground.py b/python/social_bot/envs/play_ground.py index 214c62ac..3f891b84 100644 --- a/python/social_bot/envs/play_ground.py +++ b/python/social_bot/envs/play_ground.py @@ -218,8 +218,15 @@ def _get_observation(self, teacher_sentence): obs = self._agent.get_observation(self._teacher, teacher_sentence) if self._goal_conditioned: flat_obs = obs - obs = OrderedDict(observation=obs) + obs = OrderedDict() + obs['observation'] = flat_obs # flat_obs[15:] obs['achieved_goal'] = flat_obs[6:8] + # We can use this field to relabel future state in goal conditioned + # policy on top of the achieved_goal field. + # High level needs to fill this in for the desired_goal. + # When to use real reward, when to use fake reward? + # obs['aux_achieved'] = np.concatenate( + # (flat_obs[:6], flat_obs[8:12]), axis=0) obs['desired_goal'] = flat_obs[12:14] return obs From 8281a65592deb7b2bd6c20cdd24c8fbeeb6e1c86 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Thu, 3 Sep 2020 23:36:54 -0700 Subject: [PATCH 15/53] use aux achieved field in goal conditioned GoalTask. --- pygazebo/pygazebo.cc | 15 ++++++++- python/social_bot/envs/gazebo_base.py | 8 +++-- python/social_bot/envs/play_ground.py | 44 ++++++++++++++++++++------- 3 files changed, 52 insertions(+), 15 deletions(-) diff --git a/pygazebo/pygazebo.cc b/pygazebo/pygazebo.cc index aaafa439..c9ac1548 100644 --- a/pygazebo/pygazebo.cc +++ b/pygazebo/pygazebo.cc @@ -507,6 +507,19 @@ void CloseWithoutModelbaseFini() { gazebo_sensor_initialized = false; } +void CloseWithoutFini() { + // this causes segmentation fault when run. + gazebo::physics::stop_worlds(); + gazebo::sensors::stop(); + gazebo::util::LogRecord::Instance()->Stop(); + gazebo::transport::stop(); + + gazebo::transport::fini(); + + gazebo_initialized = false; + gazebo_sensor_initialized = false; +} + void Close() { gazebo::shutdown(); gazebo_initialized = false; @@ -573,7 +586,7 @@ PYBIND11_MODULE(pygazebo, m) { m.def("close_without_model_base_fini", &CloseWithoutModelbaseFini, - "A customized close function without execute ModelbaseFini" + "A customized close function without execute ModelbaseFini. " "For some unknwon reason, ModelbaseFini() in the gazebo.shutdown() " "makes the" "process fail to exit when the environment is wrapped with process"); diff --git a/python/social_bot/envs/gazebo_base.py b/python/social_bot/envs/gazebo_base.py index 595388d1..d55df8ac 100644 --- a/python/social_bot/envs/gazebo_base.py +++ b/python/social_bot/envs/gazebo_base.py @@ -58,7 +58,9 @@ def __init__(self, if port is None: port = 0 self._port = port - # to avoid different parallel simulation has the same randomness + # This avoids different parallel simulations having the same randomness. + # When calling from alf, alf.environments.utils.create_environment calls + # env.seed() afterwards, which is the real seed being used. Not this one. random.seed(port) self._rendering_process = None self._rendering_camera = None @@ -115,8 +117,6 @@ def render(self, mode='human'): time.sleep(0.2) self._world.step(20) self._rendering_camera = self._world.get_agent('render_camera') - image = self._rendering_camera.get_camera_observation( - "default::render_camera::link::camera") if mode == 'human': if self._rendering_process is None: from subprocess import Popen @@ -126,6 +126,8 @@ def render(self, mode='human'): self._rendering_process = Popen(['gzclient']) return if mode == 'rgb_array': + image = self._rendering_camera.get_camera_observation( + "default::render_camera::link::camera") return np.array(image) raise NotImplementedError("rendering mode: " + mode + diff --git a/python/social_bot/envs/play_ground.py b/python/social_bot/envs/play_ground.py index 3f891b84..e1c77253 100644 --- a/python/social_bot/envs/play_ground.py +++ b/python/social_bot/envs/play_ground.py @@ -73,6 +73,7 @@ def __init__(self, world_name="play_ground.world", tasks=[GoalTask], goal_conditioned=False, + use_aux_achieved=False, with_language=False, with_agent_language=False, use_image_observation=False, @@ -103,6 +104,8 @@ def __init__(self, the goal object's 2-d position. Reward becomes 0 for reaching goal, -1 for any other step (assuming no distraction penalty). Task termination remains unchanged. + use_aux_achieved (bool): if True, pull out speed, pose dimensions into a separate + field: aux_achieved. Only valid when goal_conditioned is True. with_language (bool): The observation will be a dict with an extra sentence with_agent_language (bool): Include agent sentence in action space. use_image_observation (bool): Use image, or use low-dimentional states as @@ -138,6 +141,7 @@ def __init__(self, self._action_cost = action_cost self._goal_conditioned = goal_conditioned + self._use_aux_achieved = use_aux_achieved self._with_language = with_language self._seq_length = vocab_sequence_length self._with_agent_language = with_language and with_agent_language @@ -207,27 +211,45 @@ def __init__(self, assert not image_with_internal_states assert len(tasks) == 1 and tasks[0] == GoalTask # state observation + goal_shape = 2 goal_space = gym.spaces.Box( - low=-np.inf, high=np.inf, shape=(2, ), dtype=np.float32) - self.observation_space = gym.spaces.Dict( + low=-np.inf, + high=np.inf, + shape=(goal_shape, ), + dtype=np.float32) + d = OrderedDict( observation=self.observation_space, - desired_goal=goal_space, - achieved_goal=goal_space) + achieved_goal=goal_space, + desired_goal=goal_space) + if use_aux_achieved: + ob_shape = self.observation_space.shape[0] + aux_shape = 10 + ob_space = gym.spaces.Box( + low=-np.inf, + high=np.inf, + shape=(ob_shape - aux_shape - goal_shape * 2, ), + dtype=np.float32) + aux_space = gym.spaces.Box( + low=-np.inf, + high=np.inf, + shape=(aux_shape, ), + dtype=np.float32) + d["observation"] = ob_space + d["aux_achieved"] = aux_space + self.observation_space = gym.spaces.Dict(**d) def _get_observation(self, teacher_sentence): obs = self._agent.get_observation(self._teacher, teacher_sentence) if self._goal_conditioned: flat_obs = obs obs = OrderedDict() - obs['observation'] = flat_obs # flat_obs[15:] + obs['observation'] = flat_obs obs['achieved_goal'] = flat_obs[6:8] - # We can use this field to relabel future state in goal conditioned - # policy on top of the achieved_goal field. - # High level needs to fill this in for the desired_goal. - # When to use real reward, when to use fake reward? - # obs['aux_achieved'] = np.concatenate( - # (flat_obs[:6], flat_obs[8:12]), axis=0) obs['desired_goal'] = flat_obs[12:14] + if self._use_aux_achieved: + obs['observation'] = flat_obs[14:] + obs['aux_achieved'] = np.concatenate( + (flat_obs[:6], flat_obs[8:12]), axis=0) return obs def reset(self): From 9bd0d528ae0f08e63c56be1de7a48e4e3a90aed6 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Fri, 4 Sep 2020 15:44:45 -0700 Subject: [PATCH 16/53] add the option to only plan x-y related dimensions in aux_achieved --- python/social_bot/envs/play_ground.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/python/social_bot/envs/play_ground.py b/python/social_bot/envs/play_ground.py index e1c77253..6abd8cff 100644 --- a/python/social_bot/envs/play_ground.py +++ b/python/social_bot/envs/play_ground.py @@ -74,6 +74,7 @@ def __init__(self, tasks=[GoalTask], goal_conditioned=False, use_aux_achieved=False, + xy_only_aux=False, with_language=False, with_agent_language=False, use_image_observation=False, @@ -106,6 +107,8 @@ def __init__(self, unchanged. use_aux_achieved (bool): if True, pull out speed, pose dimensions into a separate field: aux_achieved. Only valid when goal_conditioned is True. + xy_only_aux (bool): exclude irrelevant dimensions (z-axis movements) from + aux_achieved field. with_language (bool): The observation will be a dict with an extra sentence with_agent_language (bool): Include agent sentence in action space. use_image_observation (bool): Use image, or use low-dimentional states as @@ -142,6 +145,7 @@ def __init__(self, self._action_cost = action_cost self._goal_conditioned = goal_conditioned self._use_aux_achieved = use_aux_achieved + self._xy_only_aux = xy_only_aux self._with_language = with_language self._seq_length = vocab_sequence_length self._with_agent_language = with_language and with_agent_language @@ -224,6 +228,8 @@ def __init__(self, if use_aux_achieved: ob_shape = self.observation_space.shape[0] aux_shape = 10 + if self._xy_only_aux: + aux_shape = 4 ob_space = gym.spaces.Box( low=-np.inf, high=np.inf, @@ -250,6 +256,14 @@ def _get_observation(self, teacher_sentence): obs['observation'] = flat_obs[14:] obs['aux_achieved'] = np.concatenate( (flat_obs[:6], flat_obs[8:12]), axis=0) + if self._xy_only_aux: + # 2: z-speed, 3, 4: angular velocity, 5: yaw-vel, 6, 7: x, y, 8: z, 9, 10, 11: pose + obs['observation'] = np.concatenate( + (flat_obs[2:5], flat_obs[8:11], flat_obs[14:]), axis=0) + obs['aux_achieved'] = np.concatenate( + (flat_obs[0:2], np.expand_dims(flat_obs[5], 0), + np.expand_dims(flat_obs[11], 0)), + axis=0) return obs def reset(self): From a94601fb80820aaf5e4831e9e79fea3b6c0bb2f1 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Tue, 8 Sep 2020 23:18:45 -0700 Subject: [PATCH 17/53] multi dimensional reward for goal conditioned goal task --- python/social_bot/envs/play_ground.py | 3 + python/social_bot/tasks.py | 102 ++++++++++++++++++++------ python/social_bot/teacher.py | 14 +++- 3 files changed, 96 insertions(+), 23 deletions(-) diff --git a/python/social_bot/envs/play_ground.py b/python/social_bot/envs/play_ground.py index 6abd8cff..dab8441e 100644 --- a/python/social_bot/envs/play_ground.py +++ b/python/social_bot/envs/play_ground.py @@ -318,6 +318,9 @@ def step(self, action): info = {"is_success": teacher_action.success} if self._has_goal_task: info["goal_range"] = teacher_action.goal_range + # Maybe use multi dimensional reward + if teacher_action.rewards is not None: + reward = teacher_action.rewards return obs, reward, teacher_action.done, info def get_step_time(self): diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 05ab2d6a..0f0242cd 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -178,8 +178,10 @@ def __init__(self, 'coke_can', 'table', 'car_wheel', 'plastic_cup', 'beer' ], goal_conditioned=False, + multi_dim_reward=False, end_on_hitting_distraction=False, end_episode_after_success=False, + reset_time_limit_on_success=True, success_distance_thresh=0.5, fail_distance_thresh=2.0, distraction_penalty_distance_thresh=0, @@ -211,9 +213,13 @@ def __init__(self, max_steps (int): episode will end if not reaching gaol in so many steps goal_name (string): name of the goal in the world distraction_list (list of string): a list of model. the model shoud be in gazebo database + goal_conditioned (bool): if True, each step has -1 reward, unless at goal state, which gives 0. + multi_dim_reward (bool): if True, separate goal reward and distraction penalty into two dimensions. end_episode_after_success (bool): if True, the episode will end once the goal is reached. A True value of this flag will overwrite the effects of flags ``switch_goal_within_episode`` and ``move_goal_during_episode``. end_on_hitting_distraction (bool): whether to end episode on hitting distraction + reset_time_limit_on_success (bool): if not ending after success, if hit success before time limit, + reset clock to 0. success_distance_thresh (float): the goal is reached if it's within this distance to the agent fail_distance_thresh (float): if the agent moves away from the goal more than this distance, it's considered a failure and is given reward -1 @@ -263,7 +269,10 @@ def __init__(self, env=env, max_steps=max_steps, reward_weight=reward_weight) self._goal_name = goal_name self._goal_conditioned = goal_conditioned + self._multi_dim_reward = multi_dim_reward self.end_on_hitting_distraction = end_on_hitting_distraction + self._end_episode_after_success = end_episode_after_success + self._reset_time_limit_on_success = reset_time_limit_on_success self._success_distance_thresh = success_distance_thresh self._fail_distance_thresh = fail_distance_thresh self._distraction_penalty_distance_thresh = distraction_penalty_distance_thresh @@ -289,7 +298,6 @@ def __init__(self, self._object_list.append(goal_name) self._goals = self._object_list self._move_goal_during_episode = move_goal_during_episode - self._end_episode_after_success = end_episode_after_success self._success_with_angle_requirement = success_with_angle_requirement if not additional_observation_list: additional_observation_list = self._object_list @@ -379,8 +387,15 @@ def _get_agent_loc(self): loc = np.array(loc) return loc, agent_dir - def _prepare_teacher_action(self, reward, sentence, done, success=False): + def _prepare_teacher_action(self, + reward, + sentence, + done, + success=False, + rewards=None): goal_dist = 0. + if rewards is not None: + rewards = rewards.astype(np.float32) if done: goal_dist = self._goal_dist # clear self._goal_dist so it is only output once @@ -390,11 +405,23 @@ def _prepare_teacher_action(self, reward, sentence, done, success=False): sentence=sentence, done=done, success=success, - goal_range=goal_dist) + goal_range=goal_dist, + rewards=rewards) def _within_angle(self, dot): return (not self._success_with_angle_requirement) or dot > 0.707 + def _get_goal_dist(self, goal): + loc, agent_dir = self._get_agent_loc() + goal_loc, _ = goal.get_pose() + goal_loc = np.array(goal_loc) + dist = np.linalg.norm(loc - goal_loc) + # dir from get_pose is (roll, pitch, yaw) + dir = np.array([math.cos(agent_dir[2]), math.sin(agent_dir[2])]) + goal_dir = (goal_loc[0:2] - loc[0:2]) / dist + dot = sum(dir * goal_dir) + return dist, dot + def run(self): """ Start a teaching episode for this task. """ agent_sentence = yield @@ -411,21 +438,13 @@ def run(self): self._move_goal(goal, loc, agent_dir) steps_since_last_reward = 0 prev_min_dist_to_distraction = 100 + rewards = None # reward array in multi_dim_reward case while steps_since_last_reward < self._max_steps: steps_since_last_reward += 1 - loc, agent_dir = self._get_agent_loc() - goal_loc, _ = goal.get_pose() - goal_loc = np.array(goal_loc) - dist = np.linalg.norm(loc - goal_loc) - # dir from get_pose is (roll, pitch, yaw) - dir = np.array([math.cos(agent_dir[2]), math.sin(agent_dir[2])]) - goal_dir = (goal_loc[0:2] - loc[0:2]) / dist - dot = sum(dir * goal_dir) - + dist, dot = self._get_goal_dist(goal) distraction_penalty, prev_min_dist_to_distraction = ( self._get_distraction_penalty(loc, dot, prev_min_dist_to_distraction)) - # TODO(Le): compare achieved goal with desired goal if task is # goal conditioned? if dist < self._success_distance_thresh and self._within_angle( @@ -435,14 +454,21 @@ def run(self): self._push_reward_queue(max(reward, 0)) if self._goal_conditioned: reward -= 1. + if self._multi_dim_reward: + rewards = np.array([0, -distraction_penalty]) + else: + if self._multi_dim_reward: + rewards = np.array([1, -distraction_penalty]) logging.debug("yielding reward: " + str(reward)) done = self._end_episode_after_success agent_sentence = yield self._prepare_teacher_action( reward=reward, sentence="well done", done=done, - success=True) - steps_since_last_reward = 0 + success=True, + rewards=rewards) + if self._reset_time_limit_on_success: + steps_since_last_reward = 0 if self._switch_goal_within_episode: self.pick_goal() goal = self._world.get_agent(self._goal_name) @@ -456,8 +482,13 @@ def run(self): logging.debug( "yielding reward: {}, farther than {} from goal".format( str(reward), str(self._fail_distance_thresh))) + if self._multi_dim_reward: + rewards = np.array([-1, -distraction_penalty]) yield self._prepare_teacher_action( - reward=reward, sentence="failed", done=True) + reward=reward, + sentence="failed", + done=True, + rewards=rewards) else: if self._sparse_reward: reward = 0 @@ -465,6 +496,8 @@ def run(self): reward = -1 else: reward = (self._prev_dist - dist) / self._initial_dist + if self._multi_dim_reward: + rewards = np.array([reward, -distraction_penalty]) reward = reward - distraction_penalty done = False if distraction_penalty > 0: @@ -473,16 +506,43 @@ def run(self): done = self.end_on_hitting_distraction self._prev_dist = dist agent_sentence = yield self._prepare_teacher_action( - reward=reward, sentence=self._goal_name, done=done) + reward=reward, + sentence=self._goal_name, + done=done, + rewards=rewards) reward = -1.0 - logging.debug("yielding reward: {}, took more than {} steps".format( - str(reward), str(self._max_steps))) - self._push_reward_queue(0) + dist, dot = self._get_goal_dist(goal) + distraction_penalty, prev_min_dist_to_distraction = ( + self._get_distraction_penalty(loc, dot, + prev_min_dist_to_distraction)) + # TODO(Le): compare achieved goal with desired goal if task is + # goal conditioned? + success = False + if dist < self._success_distance_thresh and self._within_angle(dot): + success = True + reward = 1.0 - distraction_penalty + self._push_reward_queue(max(reward, 0)) + if self._goal_conditioned: + reward -= 1. + if self._multi_dim_reward: + rewards = np.array([0, -distraction_penalty]) + else: + if self._multi_dim_reward: + rewards = np.array([1, -distraction_penalty]) + else: + self._push_reward_queue(0) + logging.debug("took more than {} steps".format( + str(self._max_steps))) + logging.debug("yielding reward: {}".format(str(reward))) if self.should_use_curriculum_training(): logging.debug("reward queue len: {}, sum: {}".format( str(len(self._q)), str(sum(self._q)))) yield self._prepare_teacher_action( - reward=reward, sentence="failed", done=True) + reward=reward, + sentence="failed", + done=True, + success=success, + rewards=rewards) def _get_distraction_penalty(self, agent_loc, dot, prev_min_dist_to_distraction): diff --git a/python/social_bot/teacher.py b/python/social_bot/teacher.py index 7c84dd9b..cb28ab13 100644 --- a/python/social_bot/teacher.py +++ b/python/social_bot/teacher.py @@ -42,11 +42,13 @@ def __init__(self, done=False, is_idle=False, success=False, - goal_range=0.0): + goal_range=0.0, + rewards=None): """ Args: done: end of an episode if true success: if the episode is successful or not + rewards: multi dim reward array """ self.reward = reward self.sentence = sentence @@ -54,6 +56,7 @@ def __init__(self, self.is_idle = is_idle self.success = success self.goal_range = float(goal_range) + self.rewards = rewards class TaskGroup(object): @@ -343,6 +346,7 @@ def teach(self, agent_sentence): active_group_id = -1 success = False goal_range = 0 + rewards = None # run all groups in parallel for i, g in enumerate(self._task_groups): teacher_action = g.teach(agent_sentence) @@ -353,6 +357,11 @@ def teach(self, agent_sentence): if teacher_action.goal_range > 0: goal_range = teacher_action.goal_range weight = g.get_current_reward_weight() + if teacher_action.rewards is not None: + if rewards is None: + rewards = weight * teacher_action.rewards + else: + rewards += weight * teacher_action.rewards final_reward += weight * teacher_action.reward if not final_sentence: final_sentence = teacher_action.sentence @@ -365,5 +374,6 @@ def teach(self, agent_sentence): final_sentence, done, success=success, - goal_range=goal_range) + goal_range=goal_range, + rewards=rewards) return return_action From dca08074335253695ff83b19e56dd9edb8cf7179 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Tue, 8 Sep 2020 23:19:08 -0700 Subject: [PATCH 18/53] adding a smaller ball to indicate target for pioneer, so it doesn't collide with the ball, and learn to stay close to the ball. --- .../models/target_ball/model.config | 10 +++++ .../social_bot/models/target_ball/model.sdf | 43 +++++++++++++++++++ 2 files changed, 53 insertions(+) create mode 100644 python/social_bot/models/target_ball/model.config create mode 100644 python/social_bot/models/target_ball/model.sdf diff --git a/python/social_bot/models/target_ball/model.config b/python/social_bot/models/target_ball/model.config new file mode 100644 index 00000000..9aaa1ce2 --- /dev/null +++ b/python/social_bot/models/target_ball/model.config @@ -0,0 +1,10 @@ + + + Target Ball + 1.0 + model.sdf + + + A ghost red ball. + + diff --git a/python/social_bot/models/target_ball/model.sdf b/python/social_bot/models/target_ball/model.sdf new file mode 100644 index 00000000..7e5a742a --- /dev/null +++ b/python/social_bot/models/target_ball/model.sdf @@ -0,0 +1,43 @@ + + + + true + 0 0 0 0 0 0 + + 0 0 0.527 0 0 0 + + + + 0.03 + + + + 0.6 0.1 0.1 1 + 0.7 0.1 0.1 1 + 0 0 0 0 + 0.0 0 0 1 + + + + + + + 0 + + + + + 0.0 + 0 + + + + 0 + 0.1 + + + + + + + From 411a0961f76785b6bb285295eff1da0fd7657202 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Thu, 10 Sep 2020 10:33:38 -0700 Subject: [PATCH 19/53] fix missing distraction penalty --- python/social_bot/envs/gazebo_base.py | 1 + python/social_bot/tasks.py | 11 +++++------ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/python/social_bot/envs/gazebo_base.py b/python/social_bot/envs/gazebo_base.py index d55df8ac..4642a30d 100644 --- a/python/social_bot/envs/gazebo_base.py +++ b/python/social_bot/envs/gazebo_base.py @@ -138,6 +138,7 @@ def close(self): gazebo.close_without_model_base_fini() if self._rendering_process is not None: self._rendering_process.terminate() + self._rendering_process = None def insert_model(self, model, name=None, pose="0 0 0 0 0 0"): """ diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 0f0242cd..cf103782 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -420,7 +420,7 @@ def _get_goal_dist(self, goal): dir = np.array([math.cos(agent_dir[2]), math.sin(agent_dir[2])]) goal_dir = (goal_loc[0:2] - loc[0:2]) / dist dot = sum(dir * goal_dir) - return dist, dot + return dist, dot, loc def run(self): """ Start a teaching episode for this task. """ @@ -430,18 +430,17 @@ def run(self): loc, agent_dir = self._agent.get_pose() self._agent.set_pose((loc, (agent_dir[0], agent_dir[1], 2 * math.pi * random.random()))) - loc, agent_dir = self._agent.get_pose() - loc = np.array(loc) + a_loc, a_dir = self._get_agent_loc() self._random_move_objects() self.pick_goal() goal = self._world.get_model(self._goal_name) - self._move_goal(goal, loc, agent_dir) + self._move_goal(goal, a_loc, a_dir) steps_since_last_reward = 0 prev_min_dist_to_distraction = 100 rewards = None # reward array in multi_dim_reward case while steps_since_last_reward < self._max_steps: steps_since_last_reward += 1 - dist, dot = self._get_goal_dist(goal) + dist, dot, loc = self._get_goal_dist(goal) distraction_penalty, prev_min_dist_to_distraction = ( self._get_distraction_penalty(loc, dot, prev_min_dist_to_distraction)) @@ -511,7 +510,7 @@ def run(self): done=done, rewards=rewards) reward = -1.0 - dist, dot = self._get_goal_dist(goal) + dist, dot, loc = self._get_goal_dist(goal) distraction_penalty, prev_min_dist_to_distraction = ( self._get_distraction_penalty(loc, dot, prev_min_dist_to_distraction)) From 849b5eaee6093085036809e5b92b8560c9538761 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Thu, 10 Sep 2020 11:47:20 -0700 Subject: [PATCH 20/53] address comments from JT --- python/social_bot/envs/play_ground.py | 74 +---------------------- python/social_bot/gazebo_agent.py | 86 ++++++++++++++++++++++++++- 2 files changed, 86 insertions(+), 74 deletions(-) diff --git a/python/social_bot/envs/play_ground.py b/python/social_bot/envs/play_ground.py index dab8441e..68ef5f0b 100644 --- a/python/social_bot/envs/play_ground.py +++ b/python/social_bot/envs/play_ground.py @@ -72,9 +72,6 @@ def __init__(self, agent_type='pioneer2dx_noplugin', world_name="play_ground.world", tasks=[GoalTask], - goal_conditioned=False, - use_aux_achieved=False, - xy_only_aux=False, with_language=False, with_agent_language=False, use_image_observation=False, @@ -97,18 +94,6 @@ def __init__(self, world_name (string): Select the world file, e.g., empty.world, play_ground.world, grocery_ground.world tasks (list): a list of teacher.Task, e.g., GoalTask, KickingBallTask - goal_conditioned (bool): Turn on goal conditioned tasks. Currently only GoalTask - with full state observation has this mode enabled. The observation will - become an OrderedDict including ``achieved_goal``, ``desired_goal`` and - ``observation`` fields, instead of a flat np array. ``achieved_goal`` will - contain agent's current 2-d (x, y) position and ``desired_goal`` will be - the goal object's 2-d position. Reward becomes 0 for reaching goal, -1 for - any other step (assuming no distraction penalty). Task termination remains - unchanged. - use_aux_achieved (bool): if True, pull out speed, pose dimensions into a separate - field: aux_achieved. Only valid when goal_conditioned is True. - xy_only_aux (bool): exclude irrelevant dimensions (z-axis movements) from - aux_achieved field. with_language (bool): The observation will be a dict with an extra sentence with_agent_language (bool): Include agent sentence in action space. use_image_observation (bool): Use image, or use low-dimentional states as @@ -143,9 +128,6 @@ def __init__(self, """ self._action_cost = action_cost - self._goal_conditioned = goal_conditioned - self._use_aux_achieved = use_aux_achieved - self._xy_only_aux = xy_only_aux self._with_language = with_language self._seq_length = vocab_sequence_length self._with_agent_language = with_language and with_agent_language @@ -208,63 +190,9 @@ def __init__(self, self.reset() self.observation_space = self._agent.get_observation_space( self._teacher) - if self._goal_conditioned: - assert agent_type == 'pioneer2dx_noplugin' - assert not with_language - assert not use_image_observation - assert not image_with_internal_states - assert len(tasks) == 1 and tasks[0] == GoalTask - # state observation - goal_shape = 2 - goal_space = gym.spaces.Box( - low=-np.inf, - high=np.inf, - shape=(goal_shape, ), - dtype=np.float32) - d = OrderedDict( - observation=self.observation_space, - achieved_goal=goal_space, - desired_goal=goal_space) - if use_aux_achieved: - ob_shape = self.observation_space.shape[0] - aux_shape = 10 - if self._xy_only_aux: - aux_shape = 4 - ob_space = gym.spaces.Box( - low=-np.inf, - high=np.inf, - shape=(ob_shape - aux_shape - goal_shape * 2, ), - dtype=np.float32) - aux_space = gym.spaces.Box( - low=-np.inf, - high=np.inf, - shape=(aux_shape, ), - dtype=np.float32) - d["observation"] = ob_space - d["aux_achieved"] = aux_space - self.observation_space = gym.spaces.Dict(**d) def _get_observation(self, teacher_sentence): - obs = self._agent.get_observation(self._teacher, teacher_sentence) - if self._goal_conditioned: - flat_obs = obs - obs = OrderedDict() - obs['observation'] = flat_obs - obs['achieved_goal'] = flat_obs[6:8] - obs['desired_goal'] = flat_obs[12:14] - if self._use_aux_achieved: - obs['observation'] = flat_obs[14:] - obs['aux_achieved'] = np.concatenate( - (flat_obs[:6], flat_obs[8:12]), axis=0) - if self._xy_only_aux: - # 2: z-speed, 3, 4: angular velocity, 5: yaw-vel, 6, 7: x, y, 8: z, 9, 10, 11: pose - obs['observation'] = np.concatenate( - (flat_obs[2:5], flat_obs[8:11], flat_obs[14:]), axis=0) - obs['aux_achieved'] = np.concatenate( - (flat_obs[0:2], np.expand_dims(flat_obs[5], 0), - np.expand_dims(flat_obs[11], 0)), - axis=0) - return obs + return self._agent.get_observation(self._teacher, teacher_sentence) def reset(self): """ diff --git a/python/social_bot/gazebo_agent.py b/python/social_bot/gazebo_agent.py index 774e2932..7b7051a2 100644 --- a/python/social_bot/gazebo_agent.py +++ b/python/social_bot/gazebo_agent.py @@ -25,6 +25,7 @@ from absl import logging import social_bot import social_bot.pygazebo as gazebo +from social_bot.tasks import GoalTask @gin.configurable @@ -43,6 +44,9 @@ def __init__(self, with_language=False, with_agent_language=False, vocab_sequence_length=20, + goal_conditioned=False, + use_aux_achieved=False, + xy_only_aux=False, action_wrapper=None): """ Args: @@ -65,6 +69,19 @@ def __init__(self, with_language (bool): The observation will be a dict with an extra sentence with_agent_language (bool): Include language in agent's action space vocab_sequence_length (int): the length of encoded sequence if with_language + goal_conditioned (bool): Turn on goal conditioned tasks. Currently only GoalTask + with full state observation has this mode enabled. It makes the flat observation + array an OrderedDict including ``achieved_goal``, ``desired_goal`` and + ``observation`` fields. + ``achieved_goal`` will contain agent's current 2-d (x, y) position and + ``desired_goal`` will be the goal object's 2-d position. + Additionally, the flag with the same name in GoalTask makes reward 0 for + reaching goal and -1 for any other step (assuming no distraction penalty). + Task termination remains unchanged. + use_aux_achieved (bool): if True, pull out speed, pose dimensions into a separate + field: aux_achieved. Only valid when goal_conditioned is True. + xy_only_aux (bool): exclude irrelevant dimensions (z-axis movements) from + aux_achieved field. action_wrapper (None|class): Some times primitive joints is not wanted, e.g., has redundant dimensions or offset. If not None, this is used to transform the agent actions. See ActionWrapper of gazebo_agent.py for example. @@ -78,6 +95,9 @@ def __init__(self, self._with_agent_language = with_agent_language self._vocab_sequence_length = vocab_sequence_length self._sentence_space = None + self._goal_conditioned = goal_conditioned + self._use_aux_achieved = use_aux_achieved + self._xy_only_aux = xy_only_aux if config == None: # Load agent configurations @@ -170,6 +190,25 @@ def get_observation(self, teacher, sentence_raw="hello"): obs = self.get_camera_observation() else: # observation is pure low-dimentional states obs = teacher.get_task_specific_observation(self) + if self._goal_conditioned: + flat_obs = obs + obs = OrderedDict() + obs['observation'] = flat_obs + obs['achieved_goal'] = flat_obs[6:8] + obs['desired_goal'] = flat_obs[12:14] + if self._use_aux_achieved: + obs['observation'] = flat_obs[14:] + obs['aux_achieved'] = np.concatenate( + (flat_obs[:6], flat_obs[8:12]), axis=0) + if self._xy_only_aux: + # 2: z-speed, 3, 4: angular velocity, 5: yaw-vel, 6, 7: x, y, 8: z, 9, 10, 11: pose + obs['observation'] = np.concatenate( + (flat_obs[2:5], flat_obs[8:11], flat_obs[14:]), + axis=0) + obs['aux_achieved'] = np.concatenate( + (flat_obs[0:2], np.expand_dims(flat_obs[5], 0), + np.expand_dims(flat_obs[11], 0)), + axis=0) return obs def get_camera_observation(self): @@ -233,7 +272,10 @@ def get_observation_space(self, teacher): observations from teacher's taskgroups as a sample. """ obs_sample = self.get_observation(teacher) - if self._with_language or self._image_with_internal_states: + if self._goal_conditioned: + observation_space = self._construct_goal_condi_space( + obs_sample, teacher) + elif self._with_language or self._image_with_internal_states: # observation is a dictionary observation_space = self._construct_dict_space(obs_sample) elif self._use_image_observation: @@ -301,6 +343,48 @@ def _construct_dict_space(self, obs_sample): ob_space = gym.spaces.Dict(ob_space_dict) return ob_space + def _construct_goal_condi_space(self, obs_sample, teacher): + assert self.type == 'pioneer2dx_noplugin' + assert not self._with_language + assert not self._use_image_observation + assert not self._image_with_internal_states + tgs = teacher.get_task_groups() + assert len(tgs) == 1 + tasks = tgs[0].get_tasks() + assert len(tasks) == 1 and isinstance(tasks[0], GoalTask), str(tasks) + # state observation + goal_shape = 2 + goal_space = gym.spaces.Box( + low=-np.inf, high=np.inf, shape=(goal_shape, ), dtype=np.float32) + observation_space = gym.spaces.Box( + low=-np.inf, + high=np.inf, + shape=obs_sample['observation'].shape, + dtype=np.float32) + d = OrderedDict( + observation=observation_space, + achieved_goal=goal_space, + desired_goal=goal_space) + if self._use_aux_achieved: + ob_shape = observation_space.shape[0] + aux_shape = 10 + if self._xy_only_aux: + aux_shape = 4 + ob_space = gym.spaces.Box( + low=-np.inf, + high=np.inf, + shape=(ob_shape - aux_shape - goal_shape * 2, ), + dtype=np.float32) + aux_space = gym.spaces.Box( + low=-np.inf, + high=np.inf, + shape=(aux_shape, ), + dtype=np.float32) + d["observation"] = ob_space + d["aux_achieved"] = aux_space + observation_space = gym.spaces.Dict(**d) + return observation_space + def setup_joints(self, agent, joints, agent_cfg): """ Setup the joints acrroding to agent configuration. From 9d0dd00f401099e9f4dee907f2264d77f47273d4 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Thu, 10 Sep 2020 11:52:41 -0700 Subject: [PATCH 21/53] missed one fix --- python/social_bot/tasks.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index cf103782..473d6fc4 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -732,7 +732,8 @@ def task_specific_observation(self, agent): x, y, yaw) else: rotated_x, rotated_y = x, y - if self._egocentric_perception_range > 0: + if (self._use_egocentric_states + and self._egocentric_perception_range > 0): dist = math.sqrt(rotated_x * rotated_x + rotated_y * rotated_y) rotated_x /= dist From 92db4a099b8dadccc12e7159c4d0ebba8b4625a6 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Tue, 15 Sep 2020 15:31:49 -0700 Subject: [PATCH 22/53] address comments, move goal_conditioned obs generation to GoalTask --- pygazebo/pygazebo.cc | 13 ------ python/social_bot/gazebo_agent.py | 73 +++++++------------------------ python/social_bot/tasks.py | 28 ++++++++++++ 3 files changed, 44 insertions(+), 70 deletions(-) diff --git a/pygazebo/pygazebo.cc b/pygazebo/pygazebo.cc index c9ac1548..e8712282 100644 --- a/pygazebo/pygazebo.cc +++ b/pygazebo/pygazebo.cc @@ -507,19 +507,6 @@ void CloseWithoutModelbaseFini() { gazebo_sensor_initialized = false; } -void CloseWithoutFini() { - // this causes segmentation fault when run. - gazebo::physics::stop_worlds(); - gazebo::sensors::stop(); - gazebo::util::LogRecord::Instance()->Stop(); - gazebo::transport::stop(); - - gazebo::transport::fini(); - - gazebo_initialized = false; - gazebo_sensor_initialized = false; -} - void Close() { gazebo::shutdown(); gazebo_initialized = false; diff --git a/python/social_bot/gazebo_agent.py b/python/social_bot/gazebo_agent.py index 7b7051a2..062da801 100644 --- a/python/social_bot/gazebo_agent.py +++ b/python/social_bot/gazebo_agent.py @@ -45,8 +45,6 @@ def __init__(self, with_agent_language=False, vocab_sequence_length=20, goal_conditioned=False, - use_aux_achieved=False, - xy_only_aux=False, action_wrapper=None): """ Args: @@ -78,10 +76,6 @@ def __init__(self, Additionally, the flag with the same name in GoalTask makes reward 0 for reaching goal and -1 for any other step (assuming no distraction penalty). Task termination remains unchanged. - use_aux_achieved (bool): if True, pull out speed, pose dimensions into a separate - field: aux_achieved. Only valid when goal_conditioned is True. - xy_only_aux (bool): exclude irrelevant dimensions (z-axis movements) from - aux_achieved field. action_wrapper (None|class): Some times primitive joints is not wanted, e.g., has redundant dimensions or offset. If not None, this is used to transform the agent actions. See ActionWrapper of gazebo_agent.py for example. @@ -96,8 +90,6 @@ def __init__(self, self._vocab_sequence_length = vocab_sequence_length self._sentence_space = None self._goal_conditioned = goal_conditioned - self._use_aux_achieved = use_aux_achieved - self._xy_only_aux = xy_only_aux if config == None: # Load agent configurations @@ -188,27 +180,15 @@ def get_observation(self, teacher, sentence_raw="hello"): obs = self._create_observation_dict(teacher, sentence_raw) elif self._use_image_observation: # observation is pure image obs = self.get_camera_observation() + elif self._goal_conditioned: + obs = self._generate_goal_conditioned_obs(teacher) else: # observation is pure low-dimentional states obs = teacher.get_task_specific_observation(self) - if self._goal_conditioned: - flat_obs = obs - obs = OrderedDict() - obs['observation'] = flat_obs - obs['achieved_goal'] = flat_obs[6:8] - obs['desired_goal'] = flat_obs[12:14] - if self._use_aux_achieved: - obs['observation'] = flat_obs[14:] - obs['aux_achieved'] = np.concatenate( - (flat_obs[:6], flat_obs[8:12]), axis=0) - if self._xy_only_aux: - # 2: z-speed, 3, 4: angular velocity, 5: yaw-vel, 6, 7: x, y, 8: z, 9, 10, 11: pose - obs['observation'] = np.concatenate( - (flat_obs[2:5], flat_obs[8:11], flat_obs[14:]), - axis=0) - obs['aux_achieved'] = np.concatenate( - (flat_obs[0:2], np.expand_dims(flat_obs[5], 0), - np.expand_dims(flat_obs[11], 0)), - axis=0) + return obs + + def _generate_goal_conditioned_obs(self, teacher): + tasks = teacher.get_task_groups()[0].get_tasks() + obs = tasks[0].generate_goal_conditioned_obs(self) return obs def get_camera_observation(self): @@ -343,7 +323,7 @@ def _construct_dict_space(self, obs_sample): ob_space = gym.spaces.Dict(ob_space_dict) return ob_space - def _construct_goal_condi_space(self, obs_sample, teacher): + def _assert_goal_conditioned(self, teacher): assert self.type == 'pioneer2dx_noplugin' assert not self._with_language assert not self._use_image_observation @@ -352,38 +332,17 @@ def _construct_goal_condi_space(self, obs_sample, teacher): assert len(tgs) == 1 tasks = tgs[0].get_tasks() assert len(tasks) == 1 and isinstance(tasks[0], GoalTask), str(tasks) - # state observation - goal_shape = 2 - goal_space = gym.spaces.Box( - low=-np.inf, high=np.inf, shape=(goal_shape, ), dtype=np.float32) - observation_space = gym.spaces.Box( - low=-np.inf, - high=np.inf, - shape=obs_sample['observation'].shape, - dtype=np.float32) - d = OrderedDict( - observation=observation_space, - achieved_goal=goal_space, - desired_goal=goal_space) - if self._use_aux_achieved: - ob_shape = observation_space.shape[0] - aux_shape = 10 - if self._xy_only_aux: - aux_shape = 4 - ob_space = gym.spaces.Box( - low=-np.inf, - high=np.inf, - shape=(ob_shape - aux_shape - goal_shape * 2, ), - dtype=np.float32) - aux_space = gym.spaces.Box( + + def _construct_goal_condi_space(self, obs_sample, teacher): + self._assert_goal_conditioned(teacher) + d = OrderedDict() + for k, v in obs_sample.items(): + d[k] = gym.spaces.Box( low=-np.inf, high=np.inf, - shape=(aux_shape, ), + shape=(v.shape[0], ), dtype=np.float32) - d["observation"] = ob_space - d["aux_achieved"] = aux_space - observation_space = gym.spaces.Dict(**d) - return observation_space + return gym.spaces.Dict(**d) def setup_joints(self, agent, joints, agent_cfg): """ Setup the joints acrroding to agent configuration. diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 473d6fc4..ef54d172 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -178,6 +178,8 @@ def __init__(self, 'coke_can', 'table', 'car_wheel', 'plastic_cup', 'beer' ], goal_conditioned=False, + use_aux_achieved=False, + xy_only_aux=False, multi_dim_reward=False, end_on_hitting_distraction=False, end_episode_after_success=False, @@ -214,6 +216,10 @@ def __init__(self, goal_name (string): name of the goal in the world distraction_list (list of string): a list of model. the model shoud be in gazebo database goal_conditioned (bool): if True, each step has -1 reward, unless at goal state, which gives 0. + use_aux_achieved (bool): if True, pull out speed, pose dimensions into a separate + field: aux_achieved. Only valid when goal_conditioned is True. + xy_only_aux (bool): exclude irrelevant dimensions (z-axis movements) from + aux_achieved field. multi_dim_reward (bool): if True, separate goal reward and distraction penalty into two dimensions. end_episode_after_success (bool): if True, the episode will end once the goal is reached. A True value of this flag will overwrite the effects of flags ``switch_goal_within_episode`` and ``move_goal_during_episode``. @@ -269,6 +275,8 @@ def __init__(self, env=env, max_steps=max_steps, reward_weight=reward_weight) self._goal_name = goal_name self._goal_conditioned = goal_conditioned + self._use_aux_achieved = use_aux_achieved + self._xy_only_aux = xy_only_aux self._multi_dim_reward = multi_dim_reward self.end_on_hitting_distraction = end_on_hitting_distraction self._end_episode_after_success = end_episode_after_success @@ -686,6 +694,26 @@ def set_goal_name(self, goal_name): logging.debug('Setting Goal to %s', goal_name) self._goal_name = goal_name + def generate_goal_conditioned_obs(self, agent): + flat_obs = self.task_specific_observation(agent) + obs = OrderedDict() + obs['observation'] = flat_obs + obs['achieved_goal'] = flat_obs[6:8] + obs['desired_goal'] = flat_obs[12:14] + if self._use_aux_achieved: + obs['observation'] = flat_obs[14:] + obs['aux_achieved'] = np.concatenate( + (flat_obs[:6], flat_obs[8:12]), axis=0) + if self._xy_only_aux: + # 2: z-speed, 3, 4: angular velocity, 5: yaw-vel, 6, 7: x, y, 8: z, 9, 10, 11: pose + obs['observation'] = np.concatenate( + (flat_obs[2:5], flat_obs[8:11], flat_obs[14:]), axis=0) + obs['aux_achieved'] = np.concatenate( + (flat_obs[0:2], np.expand_dims(flat_obs[5], 0), + np.expand_dims(flat_obs[11], 0)), + axis=0) + return obs + def task_specific_observation(self, agent): """ Args: From dba09315cb651c9feeadc489cfea71b6f1b70a90 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Tue, 15 Sep 2020 16:01:51 -0700 Subject: [PATCH 23/53] use more readable states to generate goal conditioned output --- python/social_bot/tasks.py | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index ef54d172..b79233fa 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -697,20 +697,26 @@ def set_goal_name(self, goal_name): def generate_goal_conditioned_obs(self, agent): flat_obs = self.task_specific_observation(agent) obs = OrderedDict() + agent_pose = np.array(agent.get_pose()).flatten() + agent_vel = np.array(agent.get_velocities()).flatten() + goal = self._world.get_model(self._goal_name) + goal_pose = np.array(goal.get_pose()[0]).flatten() obs['observation'] = flat_obs - obs['achieved_goal'] = flat_obs[6:8] - obs['desired_goal'] = flat_obs[12:14] + obs['achieved_goal'] = agent_pose[0:2] + obs['desired_goal'] = goal_pose[0:2] if self._use_aux_achieved: + # distraction objects' x, y coordinates obs['observation'] = flat_obs[14:] - obs['aux_achieved'] = np.concatenate( - (flat_obs[:6], flat_obs[8:12]), axis=0) + obs['aux_achieved'] = np.concatenate((agent_vel, agent_pose[2:]), + axis=0) if self._xy_only_aux: - # 2: z-speed, 3, 4: angular velocity, 5: yaw-vel, 6, 7: x, y, 8: z, 9, 10, 11: pose + # agent speed: 2: z-speed; 3, 4: angular velocities; 5: yaw-vel, + # agent pose: 2: z; 3, 4, 5: roll pitch yaw. obs['observation'] = np.concatenate( - (flat_obs[2:5], flat_obs[8:11], flat_obs[14:]), axis=0) + (agent_vel[2:5], agent_pose[2:5], flat_obs[14:]), axis=0) obs['aux_achieved'] = np.concatenate( - (flat_obs[0:2], np.expand_dims(flat_obs[5], 0), - np.expand_dims(flat_obs[11], 0)), + (agent_vel[0:2], np.expand_dims(agent_vel[5], 0), + np.expand_dims(agent_pose[5], 0)), axis=0) return obs From b7d0ab4e9cdea8c3902775193d1e285d33bdfb23 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Wed, 16 Sep 2020 12:59:05 -0700 Subject: [PATCH 24/53] address comments from JT. --- python/social_bot/gazebo_agent.py | 2 +- python/social_bot/tasks.py | 74 ++++++++++++++----------------- 2 files changed, 34 insertions(+), 42 deletions(-) diff --git a/python/social_bot/gazebo_agent.py b/python/social_bot/gazebo_agent.py index 062da801..88cb8eca 100644 --- a/python/social_bot/gazebo_agent.py +++ b/python/social_bot/gazebo_agent.py @@ -253,6 +253,7 @@ def get_observation_space(self, teacher): """ obs_sample = self.get_observation(teacher) if self._goal_conditioned: + self._assert_goal_conditioned(teacher) observation_space = self._construct_goal_condi_space( obs_sample, teacher) elif self._with_language or self._image_with_internal_states: @@ -334,7 +335,6 @@ def _assert_goal_conditioned(self, teacher): assert len(tasks) == 1 and isinstance(tasks[0], GoalTask), str(tasks) def _construct_goal_condi_space(self, obs_sample, teacher): - self._assert_goal_conditioned(teacher) d = OrderedDict() for k, v in obs_sample.items(): d[k] = gym.spaces.Box( diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index b79233fa..4acf9e57 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -745,49 +745,41 @@ def task_specific_observation(self, agent): pose = np.concatenate((pose, obj_pos), axis=0) agent_pose = np.array(agent.get_pose()).flatten() - if self._use_full_states or self._use_egocentric_states: - yaw = agent_pose[5] - # adds egocentric velocity input - vx, vy, vz, a1, a2, a3 = np.array(agent.get_velocities()).flatten() + yaw = agent_pose[5] + # adds egocentric velocity input + vx, vy, vz, a1, a2, a3 = np.array(agent.get_velocities()).flatten() + if self._use_egocentric_states: + rvx, rvy = agent.get_egocentric_cord_2d(vx, vy, yaw) + else: + rvx, rvy = vx, vy + obs = [rvx, rvy, vz, a1, a2, a3] + list(agent_pose) + # adds objects' (goal's as well as distractions') egocentric + # coordinates to observation + while len(pose) > 1: + x = pose[0] + y = pose[1] if self._use_egocentric_states: - rvx, rvy = agent.get_egocentric_cord_2d(vx, vy, yaw) + x = pose[0] - agent_pose[0] + y = pose[1] - agent_pose[1] + rotated_x, rotated_y = agent.get_egocentric_cord_2d(x, y, yaw) else: - rvx, rvy = vx, vy - obs = [rvx, rvy, vz, a1, a2, a3] + list(agent_pose) - # adds objects' (goal's as well as distractions') egocentric - # coordinates to observation - while len(pose) > 1: - x = pose[0] - y = pose[1] - if self._use_egocentric_states: - x = pose[0] - agent_pose[0] - y = pose[1] - agent_pose[1] - rotated_x, rotated_y = agent.get_egocentric_cord_2d( - x, y, yaw) - else: - rotated_x, rotated_y = x, y - if (self._use_egocentric_states - and self._egocentric_perception_range > 0): - dist = math.sqrt(rotated_x * rotated_x + - rotated_y * rotated_y) - rotated_x /= dist - rotated_y /= dist - magnitude = 1. / dist - if rotated_x < np.cos( - self._egocentric_perception_range / 180. * np.pi): - rotated_x = 0. - rotated_y = 0. - magnitude = 0. - obs.extend([rotated_x, rotated_y, magnitude]) - else: - obs.extend([rotated_x, rotated_y]) - pose = pose[3:] - obs = np.array(obs) - else: - agent_vel = np.array(agent.get_velocities()).flatten() - joints_states = agent.get_internal_states() - obs = np.concatenate((pose, agent_pose, agent_vel, joints_states), - axis=0) + rotated_x, rotated_y = x, y + if (self._use_egocentric_states + and self._egocentric_perception_range > 0): + dist = math.sqrt(rotated_x * rotated_x + rotated_y * rotated_y) + rotated_x /= dist + rotated_y /= dist + magnitude = 1. / dist + if rotated_x < np.cos( + self._egocentric_perception_range / 180. * np.pi): + rotated_x = 0. + rotated_y = 0. + magnitude = 0. + obs.extend([rotated_x, rotated_y, magnitude]) + else: + obs.extend([rotated_x, rotated_y]) + pose = pose[3:] + obs = np.array(obs) return obs From da45b969e51521297ad05587eb20414b1c1a9f21 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Wed, 16 Sep 2020 23:52:57 -0700 Subject: [PATCH 25/53] remove unused var use_full_states --- python/social_bot/tasks.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 4acf9e57..4b07f22f 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -206,7 +206,6 @@ def __init__(self, move_goal_during_episode=True, success_with_angle_requirement=True, additional_observation_list=[], - use_full_states=False, use_egocentric_states=False, egocentric_perception_range=0): """ @@ -260,7 +259,6 @@ def __init__(self, move_goal_during_episode (bool): if True, the goal will be moved during episode, when it has been achieved success_with_angle_requirement: if True then calculate the reward considering the angular requirement additional_observation_list: a list of additonal objects to be added - use_full_states (bool): For non-image observation, use full states of the world use_egocentric_states (bool): For the non-image observation case, use the states transformed to egocentric coordinate, e.g., agent's egocentric distance and direction to goal egocentric_perception_range (float): the max range in degree to limit the agent's observation. @@ -317,7 +315,6 @@ def __init__(self, self._max_play_ground_size))) self._pos_list.remove((0, 0)) self._polar_coord = polar_coord - self._use_full_states = use_full_states self._use_egocentric_states = use_egocentric_states self._egocentric_perception_range = egocentric_perception_range if self.should_use_curriculum_training(): @@ -339,13 +336,13 @@ def __init__(self, self._random_range = random_range self._goal_dist = 0. obs_format = "image" - if use_full_states or use_egocentric_states: - obs_format = "full_state" obs_relative = "ego" - if use_full_states and not use_egocentric_states: + if use_egocentric_states: + obs_format = "full_state" + else: obs_relative = "absolute" logging.info("Observations: {}, {}.".format(obs_format, obs_relative)) - if use_full_states and not use_egocentric_states: + if not use_egocentric_states: logging.info( "Dims: 0-5: agent's velocity and angular " + "velocity, 6-11: agent's position and pose, 12-13: goal x, y" + From 19ce0c7d1bdf9797427fdf58d2fe33ad892ad221 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Mon, 5 Oct 2020 10:51:01 -0700 Subject: [PATCH 26/53] random agent start position in GoalTask. Fix initial_dist and prev_dist inaccurate due to distraction objects also counted in. --- .../social_bot/models/target_ball/model.sdf | 2 +- python/social_bot/tasks.py | 22 ++++++++++++++----- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/python/social_bot/models/target_ball/model.sdf b/python/social_bot/models/target_ball/model.sdf index 7e5a742a..0e80a542 100644 --- a/python/social_bot/models/target_ball/model.sdf +++ b/python/social_bot/models/target_ball/model.sdf @@ -8,7 +8,7 @@ - 0.03 + 0.1 diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 4b07f22f..e2eb01ad 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -188,6 +188,7 @@ def __init__(self, fail_distance_thresh=2.0, distraction_penalty_distance_thresh=0, distraction_penalty=0.5, + random_agent_position=False, random_agent_orientation=False, sparse_reward=True, random_range=5.0, @@ -233,6 +234,7 @@ def __init__(self, facing it is considered hitting a distraction) distraction_penalty (float): positive float of how much to penalize getting too close to distraction objects + random_agent_position (bool): whether randomize the position of the agent at beginning of episode. random_agent_orientation (bool): whether randomize the orientation (yaw) of the agent at the beginning of an episode. sparse_reward (bool): if true, the reward is -1/0/1, otherwise the 0 case will be replaced @@ -286,6 +288,7 @@ def __init__(self, assert distraction_penalty_distance_thresh < success_distance_thresh self._distraction_penalty = distraction_penalty self._sparse_reward = sparse_reward + self._random_agent_position = random_agent_position self._random_agent_orientation = random_agent_orientation self._use_curriculum_training = use_curriculum_training self._curriculum_distractions = curriculum_distractions @@ -431,10 +434,16 @@ def run(self): """ Start a teaching episode for this task. """ agent_sentence = yield self._agent.reset() - if self._random_agent_orientation: + if self._random_agent_orientation or self._random_agent_position: loc, agent_dir = self._agent.get_pose() - self._agent.set_pose((loc, (agent_dir[0], agent_dir[1], - 2 * math.pi * random.random()))) + if self._random_agent_position: + loc = (self._max_play_ground_size * (1 - 2 * random.random()), + self._max_play_ground_size * (1 - 2 * random.random()), + loc[2]) + if self._random_agent_orientation: + agent_dir = (agent_dir[0], agent_dir[1], + 2 * math.pi * random.random()) + self._agent.set_pose((loc, agent_dir)) a_loc, a_dir = self._get_agent_loc() self._random_move_objects() self.pick_goal() @@ -639,7 +648,8 @@ def _move_obj(self, loc = np.asarray((random.random() * range - range / 2, random.random() * range - range / 2, 0)) - self._initial_dist = np.linalg.norm(loc - agent_loc) + if is_goal: + self._initial_dist = np.linalg.norm(loc - agent_loc) satisfied = True if (abs(loc[0]) > self._max_play_ground_size or abs(loc[1]) > self._max_play_ground_size): # not within walls @@ -658,7 +668,8 @@ def _move_obj(self, str(agent_loc), str(range), str(self._max_play_ground_size))) break - self._prev_dist = self._initial_dist + if is_goal: + self._prev_dist = self._initial_dist obj.reset() obj.set_pose((loc, (0, 0, 0))) return loc, dist @@ -704,6 +715,7 @@ def generate_goal_conditioned_obs(self, agent): if self._use_aux_achieved: # distraction objects' x, y coordinates obs['observation'] = flat_obs[14:] + # 0, 1, 2: vel; 3, 4, 5: angular vel; 6: z position; 7, 8, 9: roll pitch yaw obs['aux_achieved'] = np.concatenate((agent_vel, agent_pose[2:]), axis=0) if self._xy_only_aux: From 47b2ee9765033763acb7150bdab0125bd2da83c2 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Fri, 30 Oct 2020 22:14:40 -0700 Subject: [PATCH 27/53] more debug printing of agent states at episode end --- python/social_bot/tasks.py | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index e2eb01ad..d1d54284 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -428,7 +428,7 @@ def _get_goal_dist(self, goal): dir = np.array([math.cos(agent_dir[2]), math.sin(agent_dir[2])]) goal_dir = (goal_loc[0:2] - loc[0:2]) / dist dot = sum(dir * goal_dir) - return dist, dot, loc + return dist, dot, loc, agent_dir def run(self): """ Start a teaching episode for this task. """ @@ -454,7 +454,7 @@ def run(self): rewards = None # reward array in multi_dim_reward case while steps_since_last_reward < self._max_steps: steps_since_last_reward += 1 - dist, dot, loc = self._get_goal_dist(goal) + dist, dot, loc, agent_dir = self._get_goal_dist(goal) distraction_penalty, prev_min_dist_to_distraction = ( self._get_distraction_penalty(loc, dot, prev_min_dist_to_distraction)) @@ -524,7 +524,7 @@ def run(self): done=done, rewards=rewards) reward = -1.0 - dist, dot, loc = self._get_goal_dist(goal) + dist, dot, loc, agent_dir = self._get_goal_dist(goal) distraction_penalty, prev_min_dist_to_distraction = ( self._get_distraction_penalty(loc, dot, prev_min_dist_to_distraction)) @@ -546,7 +546,16 @@ def run(self): self._push_reward_queue(0) logging.debug("took more than {} steps".format( str(self._max_steps))) - logging.debug("yielding reward: {}".format(str(reward))) + agent_vel = np.array(self._agent.get_velocities()).flatten() + + def _str(arr): + res = ["["] + res.extend(["{:.2f}".format(e) for e in arr]) + res.append("]") + return " ".join(res) + + logging.debug("yielding reward: %s at\nloc %s pose %s vel %s", + str(reward), _str(loc), _str(agent_dir), _str(agent_vel)) if self.should_use_curriculum_training(): logging.debug("reward queue len: {}, sum: {}".format( str(len(self._q)), str(sum(self._q)))) From 200368d0049c86d96d62af0e297dc36fbdc03d9f Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Fri, 6 Nov 2020 14:19:16 -0800 Subject: [PATCH 28/53] add short walls to enclose playground --- python/social_bot/worlds/play_ground.world | 31 ++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/python/social_bot/worlds/play_ground.world b/python/social_bot/worlds/play_ground.world index 1c80a0a8..684849e3 100644 --- a/python/social_bot/worlds/play_ground.world +++ b/python/social_bot/worlds/play_ground.world @@ -40,6 +40,13 @@ -2 5.7 0 0 -0 0 + + 1 + + model://grey_wall + + 2 5.7 0 0 -0 0 + 1 @@ -47,6 +54,13 @@ -2 -5.7 0 0 -0 0 + + 1 + + model://grey_wall + + 2 -5.7 0 0 -0 0 + 1 @@ -61,5 +75,22 @@ -5.7 -2 0 0 -0 1.5708 + + 1 + 5.7 0 0.1 0 0 0 + + 0 0 0 0 0 0 + + + 0.2 11.4 0.2 + + + + + 0.2 11.4 0.2 + + + + From 593135abead677af29956f7b495fb516642691ac Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Wed, 11 Nov 2020 14:50:21 -0800 Subject: [PATCH 29/53] allow chaining goaltask together with probability, also avoids resetting goals too close to distractions. --- python/social_bot/tasks.py | 59 +++++++++++++++++++++++++++----------- 1 file changed, 43 insertions(+), 16 deletions(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index d1d54284..dc872770 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -184,6 +184,7 @@ def __init__(self, end_on_hitting_distraction=False, end_episode_after_success=False, reset_time_limit_on_success=True, + chain_task_rate=0, success_distance_thresh=0.5, fail_distance_thresh=2.0, distraction_penalty_distance_thresh=0, @@ -226,6 +227,10 @@ def __init__(self, end_on_hitting_distraction (bool): whether to end episode on hitting distraction reset_time_limit_on_success (bool): if not ending after success, if hit success before time limit, reset clock to 0. + chain_task_rate (float): if positive, with this much probability, the current task will be chained together + with the next task: after one tasks finishes, episode doesn't end, but keeps onto the second goal/task. + If the first task isn't achieved within max_steps, episode ends. When the first task is achieved, + step_type is MID and no goal reward is given. Reward is given only after achieving all chained tasks. success_distance_thresh (float): the goal is reached if it's within this distance to the agent fail_distance_thresh (float): if the agent moves away from the goal more than this distance, it's considered a failure and is given reward -1 @@ -281,6 +286,7 @@ def __init__(self, self.end_on_hitting_distraction = end_on_hitting_distraction self._end_episode_after_success = end_episode_after_success self._reset_time_limit_on_success = reset_time_limit_on_success + self._chain_task_rate = chain_task_rate self._success_distance_thresh = success_distance_thresh self._fail_distance_thresh = fail_distance_thresh self._distraction_penalty_distance_thresh = distraction_penalty_distance_thresh @@ -444,11 +450,24 @@ def run(self): agent_dir = (agent_dir[0], agent_dir[1], 2 * math.pi * random.random()) self._agent.set_pose((loc, agent_dir)) - a_loc, a_dir = self._get_agent_loc() self._random_move_objects() self.pick_goal() goal = self._world.get_model(self._goal_name) - self._move_goal(goal, a_loc, a_dir) + done = False + gen = self._run_one_goal(goal) + while not done: + action = next(gen) + if (action.done and self._chain_task_rate > 0 + and random.random() < self._chain_task_rate): + action.done = False + gen = self._run_one_goal(goal, move_distractions=False) + done = action.done + yield action + + def _run_one_goal(self, goal, move_distractions=True): + """Generator function of task feedback.""" + a_loc, a_dir = self._get_agent_loc() + self._move_goal(goal, a_loc, a_dir, move_distractions) steps_since_last_reward = 0 prev_min_dist_to_distraction = 100 rewards = None # reward array in multi_dim_reward case @@ -596,11 +615,18 @@ def _get_distraction_penalty(self, agent_loc, dot, prev_min_dist_to_distraction = curr_min_dist return distraction_penalty, prev_min_dist_to_distraction - def _move_goal(self, goal, agent_loc, agent_dir): + def _move_goal(self, goal, agent_loc, agent_dir, move_distractions=True): """ Move goal as well as all distraction objects to a random location. """ avoid_locations = [agent_loc] + if not move_distractions: + for item in self._distraction_list: + if item is not self._goal_name: + obj = self._world.get_model(item) + if obj: + distraction_loc = obj.get_pose()[0] + avoid_locations.append(distraction_loc) loc, dist = self._move_obj( obj=goal, agent_loc=agent_loc, @@ -610,19 +636,20 @@ def _move_goal(self, goal, agent_loc, agent_dir): self._goal_dist += dist avoid_locations.append(loc) distractions = OrderedDict() - for item in self._distraction_list: - if item is not self._goal_name: - distractions[item] = 1 - if len(distractions) and self._curriculum_distractions: - for item, _ in distractions.items(): - distraction = self._world.get_agent(item) - loc, _ = self._move_obj( - obj=distraction, - agent_loc=agent_loc, - agent_dir=agent_dir, - is_goal=False, - avoid_locations=avoid_locations) - avoid_locations.append(loc) + if move_distractions: + for item in self._distraction_list: + if item is not self._goal_name: + distractions[item] = 1 + if len(distractions) and self._curriculum_distractions: + for item, _ in distractions.items(): + distraction = self._world.get_agent(item) + loc, _ = self._move_obj( + obj=distraction, + agent_loc=agent_loc, + agent_dir=agent_dir, + is_goal=False, + avoid_locations=avoid_locations) + avoid_locations.append(loc) def _move_obj(self, obj, From 9734ffb7e08ebb564f1ae57a5998d273f582631d Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Tue, 12 Jan 2021 21:58:18 -0800 Subject: [PATCH 30/53] allow setting min_distance of goal to agent's location, so now goal range is uniformly random within random_range (max) and the min. --- python/social_bot/tasks.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index dc872770..0eb7bfd1 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -193,6 +193,7 @@ def __init__(self, random_agent_orientation=False, sparse_reward=True, random_range=5.0, + min_distance=0, polar_coord=True, random_goal=False, use_curriculum_training=False, @@ -245,6 +246,8 @@ def __init__(self, sparse_reward (bool): if true, the reward is -1/0/1, otherwise the 0 case will be replaced with normalized distance the agent get closer to goal. random_range (float): the goal's random position range + min_distance (float): the goal must be this minimum distance away from avoided locations. If this is smaller + than the success_distance_thresh, then success_distance_thresh is used instead. polar_coord (bool): use cartesian coordinates in random_range, otherwise, use polar coord. random_goal (bool): if True, teacher will randomly select goal from the object list each episode use_curriculum_training (bool): when true, use curriculum in goal task training @@ -343,6 +346,9 @@ def __init__(self, self._reward_thresh_to_increase_range) else: self._random_range = random_range + if min_distance < self._success_distance_thresh: + min_distance = self._success_distance_thresh + self._min_distance = min_distance self._goal_dist = 0. obs_format = "image" obs_relative = "ego" @@ -457,7 +463,7 @@ def run(self): gen = self._run_one_goal(goal) while not done: action = next(gen) - if (action.done and self._chain_task_rate > 0 + if (action.done and action.success and self._chain_task_rate > 0 and random.random() < self._chain_task_rate): action.done = False gen = self._run_one_goal(goal, move_distractions=False) @@ -692,7 +698,7 @@ def _move_obj(self, satisfied = False for avoid_loc in avoid_locations: dist = np.linalg.norm(loc - avoid_loc) - if dist < self._success_distance_thresh: + if dist < self._min_distance: satisfied = False break if satisfied or attempts > 10000: From 5c98ed08a885f091489644bc1f2a24e252fc7e36 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Thu, 21 Jan 2021 14:21:31 -0800 Subject: [PATCH 31/53] allow using speed_goal (besides position goal) in GoalTask. --- python/social_bot/tasks.py | 36 +++++++++++++++++++++++++++--------- 1 file changed, 27 insertions(+), 9 deletions(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 0eb7bfd1..e3297bf7 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -178,6 +178,7 @@ def __init__(self, 'coke_can', 'table', 'car_wheel', 'plastic_cup', 'beer' ], goal_conditioned=False, + speed_goal=False, use_aux_achieved=False, xy_only_aux=False, multi_dim_reward=False, @@ -218,6 +219,7 @@ def __init__(self, goal_name (string): name of the goal in the world distraction_list (list of string): a list of model. the model shoud be in gazebo database goal_conditioned (bool): if True, each step has -1 reward, unless at goal state, which gives 0. + speed_goal (bool): if True, use speed pose etc. together with position as part of goal. use_aux_achieved (bool): if True, pull out speed, pose dimensions into a separate field: aux_achieved. Only valid when goal_conditioned is True. xy_only_aux (bool): exclude irrelevant dimensions (z-axis movements) from @@ -283,6 +285,7 @@ def __init__(self, env=env, max_steps=max_steps, reward_weight=reward_weight) self._goal_name = goal_name self._goal_conditioned = goal_conditioned + self._speed_goal = speed_goal self._use_aux_achieved = use_aux_achieved self._xy_only_aux = xy_only_aux self._multi_dim_reward = multi_dim_reward @@ -639,6 +642,15 @@ def _move_goal(self, goal, agent_loc, agent_dir, move_distractions=True): agent_dir=agent_dir, is_goal=True, avoid_locations=avoid_locations) + if self._speed_goal: + MAX_SPEED = 4 * 0.8 # -2 to 2 + xspeed = (0.5 - random.random()) * MAX_SPEED + yspeed = (0.5 - random.random()) * MAX_SPEED + yawspeed = (0.5 - random.random()) * MAX_SPEED + yaw = np.arctan2(yspeed, xspeed) + # 0, 1, 2: vel; 3, 4, 5: angular vel; 6: z position; 7, 8, 9: roll pitch yaw + self._aux_desired = np.array( + [xspeed, yspeed, 0, 0, 0, yawspeed, 0, 0, 0, yaw]) self._goal_dist += dist avoid_locations.append(loc) distractions = OrderedDict() @@ -760,15 +772,21 @@ def generate_goal_conditioned_obs(self, agent): # 0, 1, 2: vel; 3, 4, 5: angular vel; 6: z position; 7, 8, 9: roll pitch yaw obs['aux_achieved'] = np.concatenate((agent_vel, agent_pose[2:]), axis=0) - if self._xy_only_aux: - # agent speed: 2: z-speed; 3, 4: angular velocities; 5: yaw-vel, - # agent pose: 2: z; 3, 4, 5: roll pitch yaw. - obs['observation'] = np.concatenate( - (agent_vel[2:5], agent_pose[2:5], flat_obs[14:]), axis=0) - obs['aux_achieved'] = np.concatenate( - (agent_vel[0:2], np.expand_dims(agent_vel[5], 0), - np.expand_dims(agent_pose[5], 0)), - axis=0) + if self._speed_goal: + obs['achieved_goal'] = np.concatenate( + (obs['achieved_goal'], obs['aux_achieved']), axis=0) + del obs['aux_achieved'] + obs['desired_goal'] = np.concatenate( + (obs['desired_goal'], self._aux_desired), axis=0) + # if self._xy_only_aux: + # # agent speed: 2: z-speed; 3, 4: angular velocities; 5: yaw-vel, + # # agent pose: 2: z; 3, 4, 5: roll pitch yaw. + # obs['observation'] = np.concatenate( + # (agent_vel[2:5], agent_pose[2:5], flat_obs[14:]), axis=0) + # obs['aux_achieved'] = np.concatenate( + # (agent_vel[0:2], np.expand_dims(agent_vel[5], 0), + # np.expand_dims(agent_pose[5], 0)), + # axis=0) return obs def task_specific_observation(self, agent): From e454aee0a3b8b62b7716e046cf9b5aa2d539073e Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Tue, 26 Jan 2021 10:06:55 -0800 Subject: [PATCH 32/53] allow setting speed limit in speed goal task, refactor --- python/social_bot/tasks.py | 43 ++++++++++++++++++++++++++------------ 1 file changed, 30 insertions(+), 13 deletions(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index e3297bf7..94103849 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -179,6 +179,7 @@ def __init__(self, ], goal_conditioned=False, speed_goal=False, + speed_goal_limit=1.6, use_aux_achieved=False, xy_only_aux=False, multi_dim_reward=False, @@ -220,6 +221,7 @@ def __init__(self, distraction_list (list of string): a list of model. the model shoud be in gazebo database goal_conditioned (bool): if True, each step has -1 reward, unless at goal state, which gives 0. speed_goal (bool): if True, use speed pose etc. together with position as part of goal. + speed_goal_limit (float): randomly sample speed goal in the range: -limit to +limit. use_aux_achieved (bool): if True, pull out speed, pose dimensions into a separate field: aux_achieved. Only valid when goal_conditioned is True. xy_only_aux (bool): exclude irrelevant dimensions (z-axis movements) from @@ -286,6 +288,7 @@ def __init__(self, self._goal_name = goal_name self._goal_conditioned = goal_conditioned self._speed_goal = speed_goal + self._speed_goal_limit = speed_goal_limit self._use_aux_achieved = use_aux_achieved self._xy_only_aux = xy_only_aux self._multi_dim_reward = multi_dim_reward @@ -434,11 +437,26 @@ def _prepare_teacher_action(self, def _within_angle(self, dot): return (not self._success_with_angle_requirement) or dot > 0.707 + def _get_agent_aux_dims(self, agent_pose=None, agent_vel=None): + if agent_pose is None: + agent_pose = np.array(self._agent.get_pose()).flatten() + if agent_vel is None: + agent_vel = np.array(self._agent.get_velocities()).flatten() + # 0, 1, 2: vel; 3, 4, 5: angular vel; 6: z position; 7, 8, 9: roll pitch yaw + return np.concatenate((agent_vel, agent_pose[2:]), axis=0) + def _get_goal_dist(self, goal): loc, agent_dir = self._get_agent_loc() goal_loc, _ = goal.get_pose() goal_loc = np.array(goal_loc) - dist = np.linalg.norm(loc - goal_loc) + _goal_loc = goal_loc.copy() + _loc = np.array(loc).copy() + if self._speed_goal: + _goal_loc = np.concatenate((_goal_loc, self._aux_desired), axis=0) + aux = self._get_agent_aux_dims() + _loc = np.concatenate((_loc, aux), axis=0) + + dist = np.linalg.norm(_loc - _goal_loc) # dir from get_pose is (roll, pitch, yaw) dir = np.array([math.cos(agent_dir[2]), math.sin(agent_dir[2])]) goal_dir = (goal_loc[0:2] - loc[0:2]) / dist @@ -643,11 +661,12 @@ def _move_goal(self, goal, agent_loc, agent_dir, move_distractions=True): is_goal=True, avoid_locations=avoid_locations) if self._speed_goal: - MAX_SPEED = 4 * 0.8 # -2 to 2 + MAX_SPEED = self._speed_goal_limit * 2 xspeed = (0.5 - random.random()) * MAX_SPEED yspeed = (0.5 - random.random()) * MAX_SPEED yawspeed = (0.5 - random.random()) * MAX_SPEED - yaw = np.arctan2(yspeed, xspeed) + yaw = np.arctan2( + yspeed, xspeed) # numpy.arctan2 is defined as arctan(x0/x1) # 0, 1, 2: vel; 3, 4, 5: angular vel; 6: z position; 7, 8, 9: roll pitch yaw self._aux_desired = np.array( [xspeed, yspeed, 0, 0, 0, yawspeed, 0, 0, 0, yaw]) @@ -766,18 +785,16 @@ def generate_goal_conditioned_obs(self, agent): obs['observation'] = flat_obs obs['achieved_goal'] = agent_pose[0:2] obs['desired_goal'] = goal_pose[0:2] - if self._use_aux_achieved: + aux = self._get_agent_aux_dims(agent_pose, agent_vel) + if self._speed_goal: + obs['achieved_goal'] = np.concatenate((obs['achieved_goal'], aux), + axis=0) + obs['desired_goal'] = np.concatenate( + (obs['desired_goal'], self._aux_desired), axis=0) + elif self._use_aux_achieved: # distraction objects' x, y coordinates obs['observation'] = flat_obs[14:] - # 0, 1, 2: vel; 3, 4, 5: angular vel; 6: z position; 7, 8, 9: roll pitch yaw - obs['aux_achieved'] = np.concatenate((agent_vel, agent_pose[2:]), - axis=0) - if self._speed_goal: - obs['achieved_goal'] = np.concatenate( - (obs['achieved_goal'], obs['aux_achieved']), axis=0) - del obs['aux_achieved'] - obs['desired_goal'] = np.concatenate( - (obs['desired_goal'], self._aux_desired), axis=0) + obs['aux_achieved'] = aux # if self._xy_only_aux: # # agent speed: 2: z-speed; 3, 4: angular velocities; 5: yaw-vel, # # agent pose: 2: z; 3, 4, 5: roll pitch yaw. From 0bdca8cc6246d733ce38b2af9fd452cd16732c9f Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Wed, 7 Apr 2021 15:01:06 -0700 Subject: [PATCH 33/53] more debug info for goal task to output states when reward achieved. --- python/social_bot/tasks.py | 38 +++++++++++++++++++++++++++----------- 1 file changed, 27 insertions(+), 11 deletions(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 94103849..9a3ae7cc 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -519,6 +519,8 @@ def _run_one_goal(self, goal, move_distractions=True): if self._multi_dim_reward: rewards = np.array([1, -distraction_penalty]) logging.debug("yielding reward: " + str(reward)) + logging.debug("at location: %s, aux: %s", a_loc.astype('|S5'), + self._get_agent_aux_dims().astype('|S5')) done = self._end_episode_after_success agent_sentence = yield self._prepare_teacher_action( reward=reward, @@ -659,7 +661,8 @@ def _move_goal(self, goal, agent_loc, agent_dir, move_distractions=True): agent_loc=agent_loc, agent_dir=agent_dir, is_goal=True, - avoid_locations=avoid_locations) + avoid_locations=avoid_locations, + name="goal") if self._speed_goal: MAX_SPEED = self._speed_goal_limit * 2 xspeed = (0.5 - random.random()) * MAX_SPEED @@ -685,7 +688,8 @@ def _move_goal(self, goal, agent_loc, agent_dir, move_distractions=True): agent_loc=agent_loc, agent_dir=agent_dir, is_goal=False, - avoid_locations=avoid_locations) + avoid_locations=avoid_locations, + name=item) avoid_locations.append(loc) def _move_obj(self, @@ -693,7 +697,8 @@ def _move_obj(self, agent_loc, agent_dir, is_goal=True, - avoid_locations=[]): + avoid_locations=[], + name="Unspecified"): if (self.should_use_curriculum_training() and self._percent_full_range_in_curriculum > 0 and random.random() < self._percent_full_range_in_curriculum): @@ -704,6 +709,7 @@ def _move_obj(self, self._is_full_range_in_curriculum = False attempts = 0 dist = range + _min_distance = self._min_distance while True: attempts += 1 dist = random.random() * range @@ -729,17 +735,26 @@ def _move_obj(self, satisfied = False for avoid_loc in avoid_locations: dist = np.linalg.norm(loc - avoid_loc) - if dist < self._min_distance: + if dist < _min_distance: satisfied = False break - if satisfied or attempts > 10000: + if satisfied or attempts % 10000 == 0 or attempts > 30000: if not satisfied: - logging.warning( - "Took forever to find satisfying " + - "object location. " + - "agent_loc: {}, range: {}, max_size: {}.".format( - str(agent_loc), str(range), - str(self._max_play_ground_size))) + if attempts <= 30000: + _min_distance /= 2. + logging.warning( + "Took {} times to find satisfying {} " + "location. reducing _min_dist to {}".format( + str(attempts), name, str(_min_distance))) + continue + else: + logging.warning( + "Took forever to find satisfying " + + "{} location. " + + "agent_loc: {}, range: {}, _min_dist: {}, max_size" + ": {}.".format(name, str(agent_loc), str(range), + str(_min_distance), + str(self._max_play_ground_size))) break if is_goal: self._prev_dist = self._initial_dist @@ -787,6 +802,7 @@ def generate_goal_conditioned_obs(self, agent): obs['desired_goal'] = goal_pose[0:2] aux = self._get_agent_aux_dims(agent_pose, agent_vel) if self._speed_goal: + obs['observation'] = flat_obs[14:] obs['achieved_goal'] = np.concatenate((obs['achieved_goal'], aux), axis=0) obs['desired_goal'] = np.concatenate( From 1b71a778094feea11c9ba524cea00bade37a75a0 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Mon, 17 May 2021 11:52:26 -0700 Subject: [PATCH 34/53] add PushReachTask --- python/social_bot/envs/play_ground.py | 4 +- .../model.config | 18 ++ .../wood_cube_30cm_without_offset/model.sdf | 63 +++++++ python/social_bot/tasks.py | 175 +++++++++++++++++- 4 files changed, 250 insertions(+), 10 deletions(-) create mode 100644 python/social_bot/models/wood_cube_30cm_without_offset/model.config create mode 100644 python/social_bot/models/wood_cube_30cm_without_offset/model.sdf diff --git a/python/social_bot/envs/play_ground.py b/python/social_bot/envs/play_ground.py index 68ef5f0b..2f63bd4b 100644 --- a/python/social_bot/envs/play_ground.py +++ b/python/social_bot/envs/play_ground.py @@ -36,7 +36,7 @@ from social_bot.envs.gazebo_base import GazeboEnvBase from social_bot.teacher import TaskGroup from social_bot.teacher import TeacherAction -from social_bot.tasks import GoalTask, ICubAuxiliaryTask, KickingBallTask, Reaching3D +from social_bot.tasks import GoalTask, PushReachTask, ICubAuxiliaryTask, KickingBallTask, Reaching3D @gin.configurable @@ -173,7 +173,7 @@ def __init__(self, self._teacher = teacher.Teacher(task_groups_exclusive=False) self._has_goal_task = False for task in tasks: - if task == GoalTask: + if task in (GoalTask, PushReachTask): self._has_goal_task = True task_group = TaskGroup() task_group.add_task(task(env=self, max_steps=max_steps)) diff --git a/python/social_bot/models/wood_cube_30cm_without_offset/model.config b/python/social_bot/models/wood_cube_30cm_without_offset/model.config new file mode 100644 index 00000000..66ed48ec --- /dev/null +++ b/python/social_bot/models/wood_cube_30cm_without_offset/model.config @@ -0,0 +1,18 @@ + + + + Wood cube 30cm + 1.0 + model.sdf + + + Nate Koenig + nate@osrfoundation.org + + + + A 30cm^3 wooden cube modified from the 10x10x10 cm wooden cube. + The 5cm height offset of the internal link has been removed from the original version. + Also reduced mass by 0.3, and friction to 1/2. + + diff --git a/python/social_bot/models/wood_cube_30cm_without_offset/model.sdf b/python/social_bot/models/wood_cube_30cm_without_offset/model.sdf new file mode 100644 index 00000000..10db372c --- /dev/null +++ b/python/social_bot/models/wood_cube_30cm_without_offset/model.sdf @@ -0,0 +1,63 @@ + + + + + + 0.2167 + + 0.0008611666666666669 + 0 + 0 + 0.0008611666666666669 + 0 + 0.0008611666666666669 + + + + + + + 0.3 0.3 0.3 + + + + + + 0.347 + 8.8e+09 + + 100000 + 100 + 100.0 + 0.001 + + + + + 0.5 + 0 + 0.01 + + + + + + + + + 0.3 0.3 0.3 + + + + + + + + + + diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 9a3ae7cc..d0dfb48d 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -317,14 +317,16 @@ def __init__(self, if random_goal and goal_name not in distraction_list: distraction_list.append(goal_name) self._distraction_list = distraction_list - self._object_list = distraction_list - if goal_name and goal_name not in distraction_list: + self._object_list = distraction_list.copy() + if goal_name and goal_name not in self._object_list: self._object_list.append(goal_name) self._goals = self._object_list self._move_goal_during_episode = move_goal_during_episode self._success_with_angle_requirement = success_with_angle_requirement if not additional_observation_list: additional_observation_list = self._object_list + else: + self._object_list.extend(additional_observation_list) self._additional_observation_list = additional_observation_list self._pos_list = list( itertools.product( @@ -422,7 +424,7 @@ def _prepare_teacher_action(self, goal_dist = 0. if rewards is not None: rewards = rewards.astype(np.float32) - if done: + if self._goal_dist > 0: goal_dist = self._goal_dist # clear self._goal_dist so it is only output once self._goal_dist = 0. @@ -712,7 +714,8 @@ def _move_obj(self, _min_distance = self._min_distance while True: attempts += 1 - dist = random.random() * range + dist = random.random() * (range - self._success_distance_thresh + ) + self._success_distance_thresh if self._curriculum_target_angle: angle_range = self._random_angle else: @@ -734,13 +737,13 @@ def _move_obj(self, self._max_play_ground_size): # not within walls satisfied = False for avoid_loc in avoid_locations: - dist = np.linalg.norm(loc - avoid_loc) - if dist < _min_distance: + ddist = np.linalg.norm(loc - avoid_loc) + if ddist < _min_distance: satisfied = False break - if satisfied or attempts % 10000 == 0 or attempts > 30000: + if satisfied or attempts % 100 == 0 or attempts > 300: if not satisfied: - if attempts <= 30000: + if attempts <= 300: _min_distance /= 2. logging.warning( "Took {} times to find satisfying {} " @@ -886,6 +889,162 @@ def task_specific_observation(self, agent): return obs +@gin.configurable +class PushReachTask(GoalTask): + def __init__(self, + env, + max_steps, + push_only=True, + obj_names=['wood_cube_30cm_without_offset'], + goal_names=['goal_indicator'], + distraction_list=['car_wheel']): + """A Push or Push and Reach task. + + We utilize some of the curriculum, distraction obj handling logic in GoalTask. + + Args: + push_only (bool): if True, goal positions are for objects to achieve; otherwise, + the first goal position is for the agent to achieve. + obj_names (list of string): when not empty, it's the names of the objects to be moved. + goal_names (list of string): when not empty, these goal objects indicate the goal locations for + the objects in obj_names. + """ + self._push_only = push_only + self._obj_names = obj_names + self._goal_names = goal_names + if push_only: + assert len(obj_names) == len(goal_names) + else: + assert len(obj_names) == len(goal_names) - 1 + super().__init__( + env, + max_steps, + goal_conditioned=True, + use_aux_achieved=True, + multi_dim_reward=True, + end_episode_after_success=True, + goal_name=goal_names[0], + distraction_list=distraction_list, + additional_observation_list=obj_names + goal_names[1:]) + + def _random_move_objects(self, random_range=10.0): + pass + + def _move_objs(self, obj_names, ap, ad, avoids=[], record_goal_dist=False): + """Move objects according to agent location.""" + obj_positions = avoids + if record_goal_dist: + self._goal_dist = 0 + n = 0 + for obj_name in obj_names: + obj = self._world.get_agent(obj_name) + p, dist = self._move_obj( + obj, ap, ad, avoid_locations=obj_positions, name=obj_name) + if record_goal_dist: + self._goal_dist += dist + n += 1 + obj_positions.append(p) + if record_goal_dist: + self._goal_dist /= n + return obj_positions + + def _move_goals(self, ap, ad, obj_locs=[]): + """Move goal locations according to object locations.""" + # TODO(lezhao): copy obj_locs into avoids and put ap in avoids, + # then find goal location based on each obj location. + return self._move_objs( + self._goal_names, ap, ad, obj_locs, record_goal_dist=True) + + def _move_distractions(self, ap, ad, avoids=[]): + return self._move_objs(self._distraction_list, ap, ad, avoids) + + def _get_obj_loc_pose(self, obj_names): + res_pos = np.array([]) + res_aux = np.array([]) + for obj_name in obj_names: + obj = self._world.get_agent(obj_name) + obj_pos, obj_dir = obj.get_pose() + res_pos = np.concatenate((res_pos, obj_pos[0:2])) + res_aux = np.concatenate((res_aux, obj_pos[2:], obj_dir)) + return res_pos, res_aux + + def _get_achieved(self): + achieved_loc = np.array([]) + aux_achieved = np.array(self._agent.get_velocities()).flatten() + ap, ad = self._get_agent_loc() + if self._push_only: + aux_achieved = np.concatenate((aux_achieved, ap, ad)) + else: + achieved_loc = np.concatenate((achieved_loc, ap[0:2])) + aux_achieved = np.concatenate((aux_achieved, ap[2:], ad)) + ach_poss, aux_achs = self._get_obj_loc_pose(self._obj_names) + achieved_loc = np.concatenate((achieved_loc, ach_poss)) + # Ignore object aux dimensions for now + # aux_achieved = np.concatenate((aux_achieved, aux_achs)) + return achieved_loc, aux_achieved + + def _get_desired_goal(self): + return self._get_obj_loc_pose(self._goal_names)[0] + + def generate_goal_conditioned_obs(self, agent): + ach, aux_ach = self._get_achieved() + obs = OrderedDict() + obs['observation'] = self._get_obj_loc_pose(self._distraction_list)[0] + obs['achieved_goal'] = ach + obs['desired_goal'] = self._get_desired_goal() + obs['aux_achieved'] = aux_ach + return obs + + def _compute_reward(self): + def l2_dist_close_reward_fn(achieved_goal, goal): + return 0 if np.linalg.norm( + achieved_goal - goal) < self._success_distance_thresh else -1 + + return l2_dist_close_reward_fn(self._get_achieved()[0], + self._get_desired_goal()) + + def _produce_rewards(self, prev_min_dist_to_distraction): + ap, ad = self._get_agent_loc() + distraction_penalty, prev_min_dist_to_distraction = ( + self._get_distraction_penalty(ap, 1., + prev_min_dist_to_distraction)) + reward = self._compute_reward() + done = reward >= 0 + if done: + distraction_penalty = 0 + logging.debug("yielding reward: " + str(reward)) + logging.debug("at location: %s, aux: %s", ap.astype('|S5'), + np.array(ad).astype('|S5')) + reward += distraction_penalty + rewards = np.array([reward, -distraction_penalty]) + return self._prepare_teacher_action( + reward=reward, + sentence="well done", + done=done, + success=done, + rewards=rewards), prev_min_dist_to_distraction + + def _run_one_goal(self, goal=None, move_distractions=None): + # The two params are not used here. + ap, ad = self._get_agent_loc() + obj_positions = self._move_objs(self._obj_names, ap, ad) + avoids = self._move_goals(ap, ad, obj_positions) + avoids = self._move_distractions(ap, ad, avoids) + avoids.clear() + steps_since_last_reward = 0 + prev_min_dist_to_distraction = 100 + rewards = None # reward array in multi_dim_reward case + while steps_since_last_reward < self._max_steps: + steps_since_last_reward += 1 + (agent_sentence, prev_min_dist_to_distraction + ) = self._produce_rewards(prev_min_dist_to_distraction) + yield agent_sentence + (agent_sentence, prev_min_dist_to_distraction + ) = self._produce_rewards(prev_min_dist_to_distraction) + agent_sentence.done = True + yield agent_sentence + + @gin.configurable class ICubAuxiliaryTask(Task): """ From 25b0f893fcbb3e6ee922353508ce4c0ecd695555 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Wed, 26 May 2021 14:18:19 -0700 Subject: [PATCH 35/53] fix goal dist output bug --- python/social_bot/tasks.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index d0dfb48d..8c48a12b 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -358,7 +358,7 @@ def __init__(self, min_distance = self._success_distance_thresh self._min_distance = min_distance self._goal_dist = 0. - obs_format = "image" + obs_format = "image or full_state" obs_relative = "ego" if use_egocentric_states: obs_format = "full_state" @@ -424,7 +424,9 @@ def _prepare_teacher_action(self, goal_dist = 0. if rewards is not None: rewards = rewards.astype(np.float32) - if self._goal_dist > 0: + # only output when episode done, otherwise, reward accumulator in tensorboard + # may miss it. + if done: goal_dist = self._goal_dist # clear self._goal_dist so it is only output once self._goal_dist = 0. @@ -730,8 +732,6 @@ def _move_obj(self, loc = np.asarray((random.random() * range - range / 2, random.random() * range - range / 2, 0)) - if is_goal: - self._initial_dist = np.linalg.norm(loc - agent_loc) satisfied = True if (abs(loc[0]) > self._max_play_ground_size or abs(loc[1]) > self._max_play_ground_size): # not within walls @@ -758,6 +758,8 @@ def _move_obj(self, ": {}.".format(name, str(agent_loc), str(range), str(_min_distance), str(self._max_play_ground_size))) + if is_goal: + self._initial_dist = np.linalg.norm(loc - agent_loc) break if is_goal: self._prev_dist = self._initial_dist @@ -964,8 +966,9 @@ def _get_obj_loc_pose(self, obj_names): for obj_name in obj_names: obj = self._world.get_agent(obj_name) obj_pos, obj_dir = obj.get_pose() + # vel = np.array(obj.get_velocities()).flatten() res_pos = np.concatenate((res_pos, obj_pos[0:2])) - res_aux = np.concatenate((res_aux, obj_pos[2:], obj_dir)) + res_aux = np.concatenate((res_aux, obj_pos[2:], obj_dir)) # , vel return res_pos, res_aux def _get_achieved(self): From 30864bb194d7e63376e2842004c23fb9e5e9b50f Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Tue, 8 Jun 2021 11:30:42 -0700 Subject: [PATCH 36/53] allow obj to be initialized closer to the agent --- python/social_bot/tasks.py | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 61cba68d..620674c5 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -702,6 +702,7 @@ def _move_obj(self, agent_dir, is_goal=True, avoid_locations=[], + close_to_agent=False, name="Unspecified"): if (self.should_use_curriculum_training() and self._percent_full_range_in_curriculum > 0 @@ -714,10 +715,13 @@ def _move_obj(self, attempts = 0 dist = range _min_distance = self._min_distance + _succ_thd = self._success_distance_thresh + if close_to_agent: + _min_distance = 0.1 + _succ_thd = 0.1 while True: attempts += 1 - dist = random.random() * (range - self._success_distance_thresh - ) + self._success_distance_thresh + dist = random.random() * (range - _succ_thd) + _succ_thd if self._curriculum_target_angle: angle_range = self._random_angle else: @@ -899,7 +903,8 @@ def __init__(self, push_only=True, obj_names=['wood_cube_30cm_without_offset'], goal_names=['goal_indicator'], - distraction_list=['car_wheel']): + distraction_list=['car_wheel'], + close_to_agent=False): """A Push or Push and Reach task. We utilize some of the curriculum, distraction obj handling logic in GoalTask. @@ -910,10 +915,12 @@ def __init__(self, obj_names (list of string): when not empty, it's the names of the objects to be moved. goal_names (list of string): when not empty, these goal objects indicate the goal locations for the objects in obj_names. + close_to_agent (bool): whether to initialize object to be pushed closer to the agent. """ self._push_only = push_only self._obj_names = obj_names self._goal_names = goal_names + self._close_to_agent = close_to_agent if push_only: assert len(obj_names) == len(goal_names) else: @@ -932,7 +939,13 @@ def __init__(self, def _random_move_objects(self, random_range=10.0): pass - def _move_objs(self, obj_names, ap, ad, avoids=[], record_goal_dist=False): + def _move_objs(self, + obj_names, + ap, + ad, + avoids=[], + record_goal_dist=False, + close_to_agent=False): """Move objects according to agent location.""" obj_positions = avoids if record_goal_dist: @@ -941,7 +954,12 @@ def _move_objs(self, obj_names, ap, ad, avoids=[], record_goal_dist=False): for obj_name in obj_names: obj = self._world.get_agent(obj_name) p, dist = self._move_obj( - obj, ap, ad, avoid_locations=obj_positions, name=obj_name) + obj, + ap, + ad, + avoid_locations=obj_positions, + name=obj_name, + close_to_agent=close_to_agent) if record_goal_dist: self._goal_dist += dist n += 1 @@ -1030,7 +1048,8 @@ def _produce_rewards(self, prev_min_dist_to_distraction): def _run_one_goal(self, goal=None, move_distractions=None): # The two params are not used here. ap, ad = self._get_agent_loc() - obj_positions = self._move_objs(self._obj_names, ap, ad) + obj_positions = self._move_objs( + self._obj_names, ap, ad, close_to_agent=self._close_to_agent) avoids = self._move_goals(ap, ad, obj_positions) avoids = self._move_distractions(ap, ad, avoids) avoids.clear() From 4fee570612511aca18ed623f393ac50a268d8a16 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Tue, 8 Jun 2021 14:55:25 -0700 Subject: [PATCH 37/53] update some doc --- README.md | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 4f6b3f64..17a3fa88 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,11 @@ You also need to install the following packages: apt install python3-tk ``` +## Download +```bash +git clone --recursive https://github.com/HorizonRobotics/SocialRobot +``` + ## To compile ```bash cd REPO_ROOT @@ -27,7 +32,7 @@ mkdir build cd build cmake .. make -j -cd REPO_ROOT +cd .. # REPO_ROOT pip3 install -e . ``` if cmake .. complains about cannot find configuration file provided by "gazebo", run @@ -165,6 +170,22 @@ It's not so easy, for some task you may need some trials to complete it. An exam ### python You need to make sure the python you use matches the python found by cmake. You can check this by looking at REPO_ROOT/build/CMakeCache.txt +### Python config cmake errors +``` +CMake Error at pybind11/tools/FindPythonLibsNew.cmake:95 (message): + Python config failure: + +Call Stack (most recent call first): + pybind11/tools/pybind11Tools.cmake:16 (find_package) + pybind11/CMakeLists.txt:33 (include) +``` + +Remove the makefiles and build again: +```bash +cd REPO_ROOT +rm -rf build +``` + ### display You need to make sure your `DISPLAY` environment variable points to a valid display, otherwise camera sensor cannot be created and there will be the following error: From ed5f7b6da25c67ce9c0700c6a78a891d93e0fe50 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Tue, 15 Jun 2021 11:26:53 -0700 Subject: [PATCH 38/53] allow using obj pose, move obj at least 0.3 away from agent. --- python/social_bot/tasks.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 620674c5..bdfdde9b 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -717,8 +717,8 @@ def _move_obj(self, _min_distance = self._min_distance _succ_thd = self._success_distance_thresh if close_to_agent: - _min_distance = 0.1 - _succ_thd = 0.1 + _min_distance = 0.3 + _succ_thd = _min_distance while True: attempts += 1 dist = random.random() * (range - _succ_thd) + _succ_thd @@ -904,7 +904,8 @@ def __init__(self, obj_names=['wood_cube_30cm_without_offset'], goal_names=['goal_indicator'], distraction_list=['car_wheel'], - close_to_agent=False): + close_to_agent=False, + use_obj_pose=False): """A Push or Push and Reach task. We utilize some of the curriculum, distraction obj handling logic in GoalTask. @@ -916,11 +917,13 @@ def __init__(self, goal_names (list of string): when not empty, these goal objects indicate the goal locations for the objects in obj_names. close_to_agent (bool): whether to initialize object to be pushed closer to the agent. + use_obj_pose (bool): include object auxiliary dimensions as input. """ self._push_only = push_only self._obj_names = obj_names self._goal_names = goal_names self._close_to_agent = close_to_agent + self._use_obj_pose = use_obj_pose if push_only: assert len(obj_names) == len(goal_names) else: @@ -1000,8 +1003,8 @@ def _get_achieved(self): aux_achieved = np.concatenate((aux_achieved, ap[2:], ad)) ach_poss, aux_achs = self._get_obj_loc_pose(self._obj_names) achieved_loc = np.concatenate((achieved_loc, ach_poss)) - # Ignore object aux dimensions for now - # aux_achieved = np.concatenate((aux_achieved, aux_achs)) + if self._use_obj_pose: + aux_achieved = np.concatenate((aux_achieved, aux_achs)) return achieved_loc, aux_achieved def _get_desired_goal(self): From d68a3ff01409807d60bd346b0322e401e2267e96 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Fri, 18 Jun 2021 22:07:35 -0700 Subject: [PATCH 39/53] allow moving target relative to object position, vs agent --- python/social_bot/tasks.py | 28 +++++++++++++++++++++++----- 1 file changed, 23 insertions(+), 5 deletions(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index bdfdde9b..6cec25fa 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -905,6 +905,7 @@ def __init__(self, goal_names=['goal_indicator'], distraction_list=['car_wheel'], close_to_agent=False, + target_relative_to_obj=False, use_obj_pose=False): """A Push or Push and Reach task. @@ -917,11 +918,13 @@ def __init__(self, goal_names (list of string): when not empty, these goal objects indicate the goal locations for the objects in obj_names. close_to_agent (bool): whether to initialize object to be pushed closer to the agent. + target_relative_to_obj (bool): initialize target position relative to object vs agent location. use_obj_pose (bool): include object auxiliary dimensions as input. """ self._push_only = push_only self._obj_names = obj_names self._goal_names = goal_names + self._target_relative_to_obj = target_relative_to_obj self._close_to_agent = close_to_agent self._use_obj_pose = use_obj_pose if push_only: @@ -948,24 +951,34 @@ def _move_objs(self, ad, avoids=[], record_goal_dist=False, - close_to_agent=False): + close_to_agent=False, + target_relative_to_obj=False): """Move objects according to agent location.""" obj_positions = avoids + if target_relative_to_obj: + assert len(avoids) == len(obj_names), "num objects != num targets" + orig_obj_pos = avoids.copy() + if record_goal_dist: self._goal_dist = 0 - n = 0 + n = 0 for obj_name in obj_names: obj = self._world.get_agent(obj_name) + start_pos = ap + _avoids = obj_positions + if target_relative_to_obj: + start_pos = orig_obj_pos[n] + _avoids = _avoids + [ap] p, dist = self._move_obj( obj, - ap, + start_pos, ad, avoid_locations=obj_positions, name=obj_name, close_to_agent=close_to_agent) if record_goal_dist: self._goal_dist += dist - n += 1 + n += 1 obj_positions.append(p) if record_goal_dist: self._goal_dist /= n @@ -976,7 +989,12 @@ def _move_goals(self, ap, ad, obj_locs=[]): # TODO(lezhao): copy obj_locs into avoids and put ap in avoids, # then find goal location based on each obj location. return self._move_objs( - self._goal_names, ap, ad, obj_locs, record_goal_dist=True) + self._goal_names, + ap, + ad, + obj_locs, + record_goal_dist=True, + target_relative_to_obj=self._target_relative_to_obj) def _move_distractions(self, ap, ad, avoids=[]): return self._move_objs(self._distraction_list, ap, ad, avoids) From 59086484846194fd3143bade74bfddd146f23546 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Thu, 12 Aug 2021 11:42:26 -0700 Subject: [PATCH 40/53] allow turn off multi dim reward in push task; more debug printing. --- python/social_bot/tasks.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 6cec25fa..1ab2b0d9 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -904,6 +904,7 @@ def __init__(self, obj_names=['wood_cube_30cm_without_offset'], goal_names=['goal_indicator'], distraction_list=['car_wheel'], + multi_dim_reward=True, close_to_agent=False, target_relative_to_obj=False, use_obj_pose=False): @@ -924,6 +925,7 @@ def __init__(self, self._push_only = push_only self._obj_names = obj_names self._goal_names = goal_names + self._multi_dim_reward = multi_dim_reward self._target_relative_to_obj = target_relative_to_obj self._close_to_agent = close_to_agent self._use_obj_pose = use_obj_pose @@ -936,7 +938,7 @@ def __init__(self, max_steps, goal_conditioned=True, use_aux_achieved=True, - multi_dim_reward=True, + multi_dim_reward=multi_dim_reward, end_episode_after_success=True, goal_name=goal_names[0], distraction_list=distraction_list, @@ -1054,11 +1056,15 @@ def _produce_rewards(self, prev_min_dist_to_distraction): done = reward >= 0 if done: distraction_penalty = 0 + av = np.array(self._agent.get_velocities()).flatten() logging.debug("yielding reward: " + str(reward)) - logging.debug("at location: %s, aux: %s", ap.astype('|S5'), - np.array(ad).astype('|S5')) + logging.debug("at location: %s, aux: %s, vel: %s", + ap.astype('|S5'), + np.array(ad).astype('|S5'), av.astype('|S5')) reward += distraction_penalty - rewards = np.array([reward, -distraction_penalty]) + rewards = None + if self._multi_dim_reward: + rewards = np.array([reward, -distraction_penalty]) return self._prepare_teacher_action( reward=reward, sentence="well done", @@ -1076,7 +1082,6 @@ def _run_one_goal(self, goal=None, move_distractions=None): avoids.clear() steps_since_last_reward = 0 prev_min_dist_to_distraction = 100 - rewards = None # reward array in multi_dim_reward case while steps_since_last_reward < self._max_steps: steps_since_last_reward += 1 (agent_sentence, prev_min_dist_to_distraction From 8bd875373b43dd02d30208176fa097b05bde2ee3 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Mon, 16 Aug 2021 15:01:02 -0700 Subject: [PATCH 41/53] proper seeding of envs for deterministic randomization --- pygazebo/pygazebo.cc | 6 ++++++ python/social_bot/envs/gazebo_base.py | 15 +++++++++++++-- 2 files changed, 19 insertions(+), 2 deletions(-) diff --git a/pygazebo/pygazebo.cc b/pygazebo/pygazebo.cc index cb94ce7c..faccf1e4 100644 --- a/pygazebo/pygazebo.cc +++ b/pygazebo/pygazebo.cc @@ -22,6 +22,7 @@ #include #include #include +#include #include // NOLINT #include @@ -469,6 +470,7 @@ class World { void Initialize(const std::vector& args, int port = 0, + int seed = -1, bool quiet = false) { if (port != 0) { std::string uri = "localhost:" + std::to_string(port); @@ -477,6 +479,9 @@ void Initialize(const std::vector& args, if (!gazebo_initialized) { gazebo::common::Console::SetQuiet(quiet); gazebo::setupServer(args); + if (seed >= 0) { + ignition::math::Rand::Seed((unsigned int)seed); + } // gazebo::runWorld uses World::RunLoop(). RunLoop() starts LogWorker() // every time. LogWorker will always store something in the buffer when // it is started. But we don't have a running LogRecord to collect all @@ -568,6 +573,7 @@ PYBIND11_MODULE(pygazebo, m) { "Initialize", py::arg("args") = std::vector(), py::arg("port") = 0, + py::arg("seed") = -1, py::arg("quiet") = false); m.def("close", &Close, "Shutdown everything of gazebo"); diff --git a/python/social_bot/envs/gazebo_base.py b/python/social_bot/envs/gazebo_base.py index 4642a30d..f284b85f 100644 --- a/python/social_bot/envs/gazebo_base.py +++ b/python/social_bot/envs/gazebo_base.py @@ -43,6 +43,8 @@ def __init__(self, world_config=None, sim_time_precision=0.001, port=None, + seed=-1, + use_port_in_gz_seed=True, quiet=False): """ Args: @@ -52,6 +54,10 @@ def __init__(self, see `_modify_world_xml` for details sim_time_precision (float): the time precision of the simulator port (int): Gazebo port + seed (int): random seed for gazebo, -1 for using the default: PID + use_port_in_gz_seed (bool): if True, use port and seed in gazebo seed. + Set to False when doing repeatability debugging. Set to True when + running several envs in parallel during regular training. quiet (bool) Set quiet output """ os.environ["GAZEBO_MODEL_DATABASE_URI"] = "" @@ -61,12 +67,17 @@ def __init__(self, # This avoids different parallel simulations having the same randomness. # When calling from alf, alf.environments.utils.create_environment calls # env.seed() afterwards, which is the real seed being used. Not this one. - random.seed(port) + _seed = port + if seed >= 0: + _seed = seed + if use_port_in_gz_seed: + _seed += port + random.seed(_seed) self._rendering_process = None self._rendering_camera = None # the default camera pose for rendering rgb_array, could be override self._rendering_cam_pose = "10 -10 6 0 0.4 2.4" - gazebo.initialize(port=port, quiet=quiet) + gazebo.initialize(port=port, seed=_seed, quiet=quiet) if world_file: world_file_abs_path = os.path.join(social_bot.get_world_dir(), From 8a15ea9e0aa8815712355cc06dad94dcf7f4eaeb Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Tue, 21 Sep 2021 09:29:04 -0700 Subject: [PATCH 42/53] mode with Push and Reach goals in a single PushReachTask. --- python/social_bot/tasks.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 1ab2b0d9..d654e15d 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -900,7 +900,7 @@ class PushReachTask(GoalTask): def __init__(self, env, max_steps, - push_only=True, + push_reach=False, obj_names=['wood_cube_30cm_without_offset'], goal_names=['goal_indicator'], distraction_list=['car_wheel'], @@ -913,7 +913,7 @@ def __init__(self, We utilize some of the curriculum, distraction obj handling logic in GoalTask. Args: - push_only (bool): if True, goal positions are for objects to achieve; otherwise, + push_reach (bool): if False, push only, goal positions are for objects to achieve; otherwise, the first goal position is for the agent to achieve. obj_names (list of string): when not empty, it's the names of the objects to be moved. goal_names (list of string): when not empty, these goal objects indicate the goal locations for @@ -922,14 +922,16 @@ def __init__(self, target_relative_to_obj (bool): initialize target position relative to object vs agent location. use_obj_pose (bool): include object auxiliary dimensions as input. """ - self._push_only = push_only + self._push_reach = push_reach + if push_reach: + goal_names.append('target_ball') self._obj_names = obj_names self._goal_names = goal_names self._multi_dim_reward = multi_dim_reward self._target_relative_to_obj = target_relative_to_obj self._close_to_agent = close_to_agent self._use_obj_pose = use_obj_pose - if push_only: + if not push_reach: assert len(obj_names) == len(goal_names) else: assert len(obj_names) == len(goal_names) - 1 @@ -1015,8 +1017,10 @@ def _get_obj_loc_pose(self, obj_names): def _get_achieved(self): achieved_loc = np.array([]) aux_achieved = np.array(self._agent.get_velocities()).flatten() + # print("gazebo port: ", self._env._port) + # print("gazebo aux_ach: ", aux_achieved, flush=True) ap, ad = self._get_agent_loc() - if self._push_only: + if not self._push_reach: aux_achieved = np.concatenate((aux_achieved, ap, ad)) else: achieved_loc = np.concatenate((achieved_loc, ap[0:2])) From b90e6e5ff0c1cb1754b903e6b6adadce8c46a409 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Thu, 28 Apr 2022 13:37:25 -0700 Subject: [PATCH 43/53] update Pioneer tasks (target navigation, push reach) to alf config --- python/social_bot/envs/play_ground.py | 5 +++-- python/social_bot/gazebo_agent.py | 7 ++++--- python/social_bot/tasks.py | 17 +++++++++-------- 3 files changed, 16 insertions(+), 13 deletions(-) diff --git a/python/social_bot/envs/play_ground.py b/python/social_bot/envs/play_ground.py index 2f63bd4b..615d683e 100644 --- a/python/social_bot/envs/play_ground.py +++ b/python/social_bot/envs/play_ground.py @@ -22,7 +22,6 @@ import random import json import PIL -import gin import gym from gym import spaces from absl import logging @@ -38,8 +37,10 @@ from social_bot.teacher import TeacherAction from social_bot.tasks import GoalTask, PushReachTask, ICubAuxiliaryTask, KickingBallTask, Reaching3D +import alf -@gin.configurable + +@alf.configurable class PlayGround(GazeboEnvBase): """ This envionment support agent type of pr2_noplugin, pioneer2dx_noplugin, diff --git a/python/social_bot/gazebo_agent.py b/python/social_bot/gazebo_agent.py index 88cb8eca..27d02b44 100644 --- a/python/social_bot/gazebo_agent.py +++ b/python/social_bot/gazebo_agent.py @@ -17,7 +17,6 @@ import time import random import json -import gin import numpy as np import PIL.Image from collections import OrderedDict @@ -27,8 +26,10 @@ import social_bot.pygazebo as gazebo from social_bot.tasks import GoalTask +import alf -@gin.configurable + +@alf.configurable class GazeboAgent(): """ Class for the agent of gazebo-based SocialRobot enviroments """ @@ -429,7 +430,7 @@ def wrap_actions(self, action): raise NotImplementedError("wrap_actions not implemented!") -@gin.configurable +@alf.configurable class YoubotActionWrapper(ActionWrapper): """ This action wrapper transform a new actions to primitive actions. diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index d654e15d..c05da72c 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -19,7 +19,6 @@ import numpy as np import operator import os -import gin import itertools import random import json @@ -29,6 +28,8 @@ import social_bot from social_bot.teacher import TeacherAction +import alf + class Task(object): """Base class for Task. @@ -161,7 +162,7 @@ def _random_move_object(self, return np.array(loc) -@gin.configurable +@alf.configurable class GoalTask(Task): """ A simple teacher task to find a goal. @@ -895,7 +896,7 @@ def task_specific_observation(self, agent): return obs -@gin.configurable +@alf.configurable class PushReachTask(GoalTask): def __init__(self, env, @@ -1097,7 +1098,7 @@ def _run_one_goal(self, goal=None, move_distractions=None): yield agent_sentence -@gin.configurable +@alf.configurable class ICubAuxiliaryTask(Task): """ An auxiliary task spicified for iCub, to keep the agent from falling down @@ -1272,7 +1273,7 @@ def task_specific_observation(self, agent): return icub_extra_obs -@gin.configurable +@alf.configurable class KickingBallTask(Task): """ A simple task to kick a ball so that it rolls into the goal. An @@ -1419,7 +1420,7 @@ def _move_ball(self, ball): ball.set_pose((loc, (0, 0, 0))) -@gin.configurable +@alf.configurable class Reaching3D(Task): """ A task to reach a random 3D position with the end effector of a robot arm. @@ -1504,7 +1505,7 @@ def task_specific_observation(self, agent): return obs -@gin.configurable +@alf.configurable class PickAndPlace(Task): """ A task to grip an object (a wood cube), move and then place it to the target position. @@ -1653,7 +1654,7 @@ def task_specific_observation(self, agent): (obs, finger_contacts, agent_pose, joints_states), axis=0) -@gin.configurable +@alf.configurable class Stack(Task): """ A task to stack several wood cubes. The agent need to grasp the cube and From 9185962af6112e9a966933c5bcb49c2ab60c81c7 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Thu, 28 Apr 2022 13:43:32 -0700 Subject: [PATCH 44/53] depend on alf for alf.config --- setup.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/setup.py b/setup.py index 99957feb..99c47fa8 100644 --- a/setup.py +++ b/setup.py @@ -13,9 +13,17 @@ version='0.0.1', install_requires=[ 'gin-config@git+https://github.com/HorizonRobotics/gin-config.git', - 'gym == 0.12.5', 'numpy >= 1.13.3', 'matplotlib', 'psutil', - 'lxml >= 3.5.0', 'Pillow', 'absl-py', 'pytest', 'PyUserInput', - 'python-xlib' + 'gym == 0.12.5', + 'numpy >= 1.13.3', + 'matplotlib', + 'psutil', + 'lxml >= 3.5.0', + 'Pillow', + 'absl-py', + 'pytest', + 'PyUserInput', + 'python-xlib', + 'alf@git+https://github.com/HorizonRobotics/alf.git', ], package_dir={'': 'python'}, packages=find_packages('python'), From cc5c764ede2ef8769506a50105ef01c46875b67b Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Thu, 28 Apr 2022 15:02:43 -0700 Subject: [PATCH 45/53] update alf image --- .github/workflows/python_test.yml | 2 +- setup.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/.github/workflows/python_test.yml b/.github/workflows/python_test.yml index 237bfcca..0390b91f 100644 --- a/.github/workflows/python_test.yml +++ b/.github/workflows/python_test.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-18.04 container: - image: horizonrobotics/alf:0.0.3-pytorch + image: horizonrobotics/alf:0.0.6-pytorch1.8-python3.7 steps: - uses: actions/checkout@v1 - name: Install dependencies diff --git a/setup.py b/setup.py index 99c47fa8..19872a2a 100644 --- a/setup.py +++ b/setup.py @@ -23,7 +23,6 @@ 'pytest', 'PyUserInput', 'python-xlib', - 'alf@git+https://github.com/HorizonRobotics/alf.git', ], package_dir={'': 'python'}, packages=find_packages('python'), From 7460efd7d10feeab0ae744a3d1fe0312506585b9 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Fri, 29 Apr 2022 11:30:19 -0700 Subject: [PATCH 46/53] Revert "update alf image" This reverts commit cc5c764ede2ef8769506a50105ef01c46875b67b. --- .github/workflows/python_test.yml | 2 +- setup.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/python_test.yml b/.github/workflows/python_test.yml index 0390b91f..237bfcca 100644 --- a/.github/workflows/python_test.yml +++ b/.github/workflows/python_test.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-18.04 container: - image: horizonrobotics/alf:0.0.6-pytorch1.8-python3.7 + image: horizonrobotics/alf:0.0.3-pytorch steps: - uses: actions/checkout@v1 - name: Install dependencies diff --git a/setup.py b/setup.py index 19872a2a..99c47fa8 100644 --- a/setup.py +++ b/setup.py @@ -23,6 +23,7 @@ 'pytest', 'PyUserInput', 'python-xlib', + 'alf@git+https://github.com/HorizonRobotics/alf.git', ], package_dir={'': 'python'}, packages=find_packages('python'), From 25cc082e29fb89a1b2942e422c233db3f52535f7 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Fri, 29 Apr 2022 11:30:23 -0700 Subject: [PATCH 47/53] Revert "depend on alf for alf.config" This reverts commit 9185962af6112e9a966933c5bcb49c2ab60c81c7. --- setup.py | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/setup.py b/setup.py index 99c47fa8..99957feb 100644 --- a/setup.py +++ b/setup.py @@ -13,17 +13,9 @@ version='0.0.1', install_requires=[ 'gin-config@git+https://github.com/HorizonRobotics/gin-config.git', - 'gym == 0.12.5', - 'numpy >= 1.13.3', - 'matplotlib', - 'psutil', - 'lxml >= 3.5.0', - 'Pillow', - 'absl-py', - 'pytest', - 'PyUserInput', - 'python-xlib', - 'alf@git+https://github.com/HorizonRobotics/alf.git', + 'gym == 0.12.5', 'numpy >= 1.13.3', 'matplotlib', 'psutil', + 'lxml >= 3.5.0', 'Pillow', 'absl-py', 'pytest', 'PyUserInput', + 'python-xlib' ], package_dir={'': 'python'}, packages=find_packages('python'), From 484b0f9c5edd194825d2d773ff827693b64bf3df Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Fri, 29 Apr 2022 11:30:25 -0700 Subject: [PATCH 48/53] Revert "update Pioneer tasks (target navigation, push reach) to alf config" This reverts commit b90e6e5ff0c1cb1754b903e6b6adadce8c46a409. --- python/social_bot/envs/play_ground.py | 5 ++--- python/social_bot/gazebo_agent.py | 7 +++---- python/social_bot/tasks.py | 17 ++++++++--------- 3 files changed, 13 insertions(+), 16 deletions(-) diff --git a/python/social_bot/envs/play_ground.py b/python/social_bot/envs/play_ground.py index 615d683e..2f63bd4b 100644 --- a/python/social_bot/envs/play_ground.py +++ b/python/social_bot/envs/play_ground.py @@ -22,6 +22,7 @@ import random import json import PIL +import gin import gym from gym import spaces from absl import logging @@ -37,10 +38,8 @@ from social_bot.teacher import TeacherAction from social_bot.tasks import GoalTask, PushReachTask, ICubAuxiliaryTask, KickingBallTask, Reaching3D -import alf - -@alf.configurable +@gin.configurable class PlayGround(GazeboEnvBase): """ This envionment support agent type of pr2_noplugin, pioneer2dx_noplugin, diff --git a/python/social_bot/gazebo_agent.py b/python/social_bot/gazebo_agent.py index 27d02b44..88cb8eca 100644 --- a/python/social_bot/gazebo_agent.py +++ b/python/social_bot/gazebo_agent.py @@ -17,6 +17,7 @@ import time import random import json +import gin import numpy as np import PIL.Image from collections import OrderedDict @@ -26,10 +27,8 @@ import social_bot.pygazebo as gazebo from social_bot.tasks import GoalTask -import alf - -@alf.configurable +@gin.configurable class GazeboAgent(): """ Class for the agent of gazebo-based SocialRobot enviroments """ @@ -430,7 +429,7 @@ def wrap_actions(self, action): raise NotImplementedError("wrap_actions not implemented!") -@alf.configurable +@gin.configurable class YoubotActionWrapper(ActionWrapper): """ This action wrapper transform a new actions to primitive actions. diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index c05da72c..d654e15d 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -19,6 +19,7 @@ import numpy as np import operator import os +import gin import itertools import random import json @@ -28,8 +29,6 @@ import social_bot from social_bot.teacher import TeacherAction -import alf - class Task(object): """Base class for Task. @@ -162,7 +161,7 @@ def _random_move_object(self, return np.array(loc) -@alf.configurable +@gin.configurable class GoalTask(Task): """ A simple teacher task to find a goal. @@ -896,7 +895,7 @@ def task_specific_observation(self, agent): return obs -@alf.configurable +@gin.configurable class PushReachTask(GoalTask): def __init__(self, env, @@ -1098,7 +1097,7 @@ def _run_one_goal(self, goal=None, move_distractions=None): yield agent_sentence -@alf.configurable +@gin.configurable class ICubAuxiliaryTask(Task): """ An auxiliary task spicified for iCub, to keep the agent from falling down @@ -1273,7 +1272,7 @@ def task_specific_observation(self, agent): return icub_extra_obs -@alf.configurable +@gin.configurable class KickingBallTask(Task): """ A simple task to kick a ball so that it rolls into the goal. An @@ -1420,7 +1419,7 @@ def _move_ball(self, ball): ball.set_pose((loc, (0, 0, 0))) -@alf.configurable +@gin.configurable class Reaching3D(Task): """ A task to reach a random 3D position with the end effector of a robot arm. @@ -1505,7 +1504,7 @@ def task_specific_observation(self, agent): return obs -@alf.configurable +@gin.configurable class PickAndPlace(Task): """ A task to grip an object (a wood cube), move and then place it to the target position. @@ -1654,7 +1653,7 @@ def task_specific_observation(self, agent): (obs, finger_contacts, agent_pose, joints_states), axis=0) -@alf.configurable +@gin.configurable class Stack(Task): """ A task to stack several wood cubes. The agent need to grasp the cube and From 611dbd39727e117a0b0de20984e4a10834025c8f Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Wed, 5 Oct 2022 14:45:55 -0700 Subject: [PATCH 49/53] pose only goal for goal task --- python/social_bot/tasks.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index d654e15d..35c9b4ee 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -180,6 +180,7 @@ def __init__(self, goal_conditioned=False, speed_goal=False, speed_goal_limit=1.6, + pose_goal=False, use_aux_achieved=False, xy_only_aux=False, multi_dim_reward=False, @@ -222,6 +223,8 @@ def __init__(self, goal_conditioned (bool): if True, each step has -1 reward, unless at goal state, which gives 0. speed_goal (bool): if True, use speed pose etc. together with position as part of goal. speed_goal_limit (float): randomly sample speed goal in the range: -limit to +limit. + pose_goal (bool): When speed_goal is True, if pose_goal is True, speed and everything else is put + into observations, instead of aux_achieved, so only pose is in aux_achieved. use_aux_achieved (bool): if True, pull out speed, pose dimensions into a separate field: aux_achieved. Only valid when goal_conditioned is True. xy_only_aux (bool): exclude irrelevant dimensions (z-axis movements) from @@ -289,6 +292,7 @@ def __init__(self, self._goal_conditioned = goal_conditioned self._speed_goal = speed_goal self._speed_goal_limit = speed_goal_limit + self._pose_goal = pose_goal self._use_aux_achieved = use_aux_achieved self._xy_only_aux = xy_only_aux self._multi_dim_reward = multi_dim_reward @@ -458,6 +462,8 @@ def _get_goal_dist(self, goal): if self._speed_goal: _goal_loc = np.concatenate((_goal_loc, self._aux_desired), axis=0) aux = self._get_agent_aux_dims() + if self._pose_goal: + aux = aux[-1:] # yaw _loc = np.concatenate((_loc, aux), axis=0) dist = np.linalg.norm(_loc - _goal_loc) @@ -677,6 +683,8 @@ def _move_goal(self, goal, agent_loc, agent_dir, move_distractions=True): # 0, 1, 2: vel; 3, 4, 5: angular vel; 6: z position; 7, 8, 9: roll pitch yaw self._aux_desired = np.array( [xspeed, yspeed, 0, 0, 0, yawspeed, 0, 0, 0, yaw]) + if self._pose_goal: + self._aux_desired = np.array([yaw]) self._goal_dist += dist avoid_locations.append(loc) distractions = OrderedDict() @@ -812,6 +820,10 @@ def generate_goal_conditioned_obs(self, agent): aux = self._get_agent_aux_dims(agent_pose, agent_vel) if self._speed_goal: obs['observation'] = flat_obs[14:] + if self._pose_goal: + obs['observation'] = np.concatenate( + (obs['observation'], aux[:-1]), axis=0) + aux = aux[-1:] # yaw obs['achieved_goal'] = np.concatenate((obs['achieved_goal'], aux), axis=0) obs['desired_goal'] = np.concatenate( From 96ea226dec05f591babd6b4593a8a58ec1074496 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Thu, 6 Oct 2022 15:57:42 -0700 Subject: [PATCH 50/53] adding ghost pioneer car to indicate goal position and orientation in GoalTask --- .../pioneer2dx_noplugin_ghost/model-ghost.sdf | 102 ++++++++++++++++++ .../pioneer2dx_noplugin_ghost/model.config | 21 ++++ python/social_bot/tasks.py | 21 ++-- 3 files changed, 135 insertions(+), 9 deletions(-) create mode 100644 python/social_bot/models/pioneer2dx_noplugin_ghost/model-ghost.sdf create mode 100644 python/social_bot/models/pioneer2dx_noplugin_ghost/model.config diff --git a/python/social_bot/models/pioneer2dx_noplugin_ghost/model-ghost.sdf b/python/social_bot/models/pioneer2dx_noplugin_ghost/model-ghost.sdf new file mode 100644 index 00000000..de649101 --- /dev/null +++ b/python/social_bot/models/pioneer2dx_noplugin_ghost/model-ghost.sdf @@ -0,0 +1,102 @@ + + + + true + + 0 0 0.16 0 0 0 + + 5.67 + + 0.07 + 0.08 + 0.10 + 0 + 0 + 0 + + + + 0 0 0.04 0 0 0 + + + model://pioneer2dx/meshes/chassis.dae + + + 0.7 + + + 0.7 + -0.200 0 -0.12 0 0 0 + + + 0.04 + + + + + + + + + 0.1 -.17 0.11 0 1.5707 1.5707 + + 1.5 + + 0.0051 + 0.0051 + 0.0090 + 0 + 0 + 0 + + + + 0.7 + + + 0.11 + 0.05 + + + + + + + + + 0.1 .17 0.11 0 1.5707 1.5707 + + 1.5 + + 0.0051 + 0.0051 + 0.0090 + 0 + 0 + 0 + + + + 0.7 + + + 0.11 + 0.05 + + + + + + + + + diff --git a/python/social_bot/models/pioneer2dx_noplugin_ghost/model.config b/python/social_bot/models/pioneer2dx_noplugin_ghost/model.config new file mode 100644 index 00000000..71cee123 --- /dev/null +++ b/python/social_bot/models/pioneer2dx_noplugin_ghost/model.config @@ -0,0 +1,21 @@ + + + + + Pioneer 2DX Ghost + 1.0 + model-ghost.sdf + + + Nate Koenig + nate@osrfoundation.org + + + + A model of the pioneer 2 dx robot. This is same as https://bitbucket.org/osrf/gazebo_models/src/default/pioneer2dx/ + with the plugin removed. + + + + + diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 35c9b4ee..7f3570e4 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -173,7 +173,7 @@ class GoalTask(Task): def __init__(self, env, max_steps, - goal_name="ball", + goal_name="pioneer2dx_noplugin_ghost", distraction_list=[ 'coke_can', 'table', 'car_wheel', 'plastic_cup', 'beer' ], @@ -666,13 +666,6 @@ def _move_goal(self, goal, agent_loc, agent_dir, move_distractions=True): if obj: distraction_loc = obj.get_pose()[0] avoid_locations.append(distraction_loc) - loc, dist = self._move_obj( - obj=goal, - agent_loc=agent_loc, - agent_dir=agent_dir, - is_goal=True, - avoid_locations=avoid_locations, - name="goal") if self._speed_goal: MAX_SPEED = self._speed_goal_limit * 2 xspeed = (0.5 - random.random()) * MAX_SPEED @@ -685,6 +678,13 @@ def _move_goal(self, goal, agent_loc, agent_dir, move_distractions=True): [xspeed, yspeed, 0, 0, 0, yawspeed, 0, 0, 0, yaw]) if self._pose_goal: self._aux_desired = np.array([yaw]) + loc, dist = self._move_obj( + obj=goal, + agent_loc=agent_loc, + agent_dir=agent_dir, + is_goal=True, + avoid_locations=avoid_locations, + name="goal") self._goal_dist += dist avoid_locations.append(loc) distractions = OrderedDict() @@ -776,7 +776,10 @@ def _move_obj(self, if is_goal: self._prev_dist = self._initial_dist obj.reset() - obj.set_pose((loc, (0, 0, 0))) + yaw = 0 + if is_goal and self._speed_goal: + yaw = self._aux_desired[-1] + obj.set_pose((loc, (0, 0, yaw))) return loc, dist def _random_move_objects(self, random_range=10.0): From 3a1700177972bb4a26743d91dba1fe1c45523ff2 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Fri, 7 Oct 2022 15:33:22 -0700 Subject: [PATCH 51/53] allow additionally including x-y speed and yaw (together with x-y position) into goal in GoalTask. --- python/social_bot/tasks.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 7f3570e4..7d29b0c1 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -181,6 +181,7 @@ def __init__(self, speed_goal=False, speed_goal_limit=1.6, pose_goal=False, + yaw_and_speed=False, use_aux_achieved=False, xy_only_aux=False, multi_dim_reward=False, @@ -225,6 +226,8 @@ def __init__(self, speed_goal_limit (float): randomly sample speed goal in the range: -limit to +limit. pose_goal (bool): When speed_goal is True, if pose_goal is True, speed and everything else is put into observations, instead of aux_achieved, so only pose is in aux_achieved. + yaw_and_speed (bool): When speed_goal is True, and not pose_goal, if yaw_and_speed is True, + will include yaw and x-y speed into goal, nothing else. use_aux_achieved (bool): if True, pull out speed, pose dimensions into a separate field: aux_achieved. Only valid when goal_conditioned is True. xy_only_aux (bool): exclude irrelevant dimensions (z-axis movements) from @@ -293,6 +296,7 @@ def __init__(self, self._speed_goal = speed_goal self._speed_goal_limit = speed_goal_limit self._pose_goal = pose_goal + self._yaw_and_speed = yaw_and_speed self._use_aux_achieved = use_aux_achieved self._xy_only_aux = xy_only_aux self._multi_dim_reward = multi_dim_reward @@ -464,6 +468,8 @@ def _get_goal_dist(self, goal): aux = self._get_agent_aux_dims() if self._pose_goal: aux = aux[-1:] # yaw + elif self._yaw_and_speed: + aux = np.concatenate((aux[:2], aux[-1:]), axis=0) _loc = np.concatenate((_loc, aux), axis=0) dist = np.linalg.norm(_loc - _goal_loc) @@ -678,6 +684,8 @@ def _move_goal(self, goal, agent_loc, agent_dir, move_distractions=True): [xspeed, yspeed, 0, 0, 0, yawspeed, 0, 0, 0, yaw]) if self._pose_goal: self._aux_desired = np.array([yaw]) + elif self._yaw_and_speed: + self._aux_desired = np.array([xspeed, yspeed, yaw]) loc, dist = self._move_obj( obj=goal, agent_loc=agent_loc, @@ -827,6 +835,10 @@ def generate_goal_conditioned_obs(self, agent): obs['observation'] = np.concatenate( (obs['observation'], aux[:-1]), axis=0) aux = aux[-1:] # yaw + elif self._yaw_and_speed: + obs['observation'] = np.concatenate( + (obs['observation'], aux[2:-1]), axis=0) + aux = np.concatenate((aux[:2], aux[-1:]), axis=0) obs['achieved_goal'] = np.concatenate((obs['achieved_goal'], aux), axis=0) obs['desired_goal'] = np.concatenate( From 9f13d9816c80ee63220d4c1c797413be07f1a1b2 Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Tue, 11 Oct 2022 13:13:04 -0700 Subject: [PATCH 52/53] goal conditioned dense reward for goal task --- python/social_bot/tasks.py | 44 +++++++++++++++++++++++++++++--------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index 7d29b0c1..b8e40614 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -178,6 +178,7 @@ def __init__(self, 'coke_can', 'table', 'car_wheel', 'plastic_cup', 'beer' ], goal_conditioned=False, + goal_reward=1., speed_goal=False, speed_goal_limit=1.6, pose_goal=False, @@ -222,6 +223,7 @@ def __init__(self, goal_name (string): name of the goal in the world distraction_list (list of string): a list of model. the model shoud be in gazebo database goal_conditioned (bool): if True, each step has -1 reward, unless at goal state, which gives 0. + goal_reward (float): magnitude of the reward for reaching goal during dense reward setting. speed_goal (bool): if True, use speed pose etc. together with position as part of goal. speed_goal_limit (float): randomly sample speed goal in the range: -limit to +limit. pose_goal (bool): When speed_goal is True, if pose_goal is True, speed and everything else is put @@ -293,6 +295,7 @@ def __init__(self, env=env, max_steps=max_steps, reward_weight=reward_weight) self._goal_name = goal_name self._goal_conditioned = goal_conditioned + self._goal_reward = goal_reward self._speed_goal = speed_goal self._speed_goal_limit = speed_goal_limit self._pose_goal = pose_goal @@ -525,15 +528,21 @@ def _run_one_goal(self, goal, move_distractions=True): if dist < self._success_distance_thresh and self._within_angle( dot): # within 45 degrees of the agent direction - reward = 1.0 - distraction_penalty + reward = 0 self._push_reward_queue(max(reward, 0)) if self._goal_conditioned: - reward -= 1. + if not self._sparse_reward: + dense_reward = ( + self._prev_dist - dist) / self._initial_dist + reward = self._goal_reward + dense_reward if self._multi_dim_reward: - rewards = np.array([0, -distraction_penalty]) + rewards = np.array([reward, -distraction_penalty]) + reward -= distraction_penalty else: + reward = self._goal_reward - distraction_penalty if self._multi_dim_reward: - rewards = np.array([1, -distraction_penalty]) + rewards = np.array( + [self._goal_reward, -distraction_penalty]) logging.debug("yielding reward: " + str(reward)) logging.debug("at location: %s, aux: %s", a_loc.astype('|S5'), self._get_agent_aux_dims().astype('|S5')) @@ -575,7 +584,7 @@ def _run_one_goal(self, goal, move_distractions=True): reward = (self._prev_dist - dist) / self._initial_dist if self._multi_dim_reward: rewards = np.array([reward, -distraction_penalty]) - reward = reward - distraction_penalty + reward -= distraction_penalty done = False if distraction_penalty > 0: logging.debug("yielding reward: " + str(reward)) @@ -587,7 +596,7 @@ def _run_one_goal(self, goal, move_distractions=True): sentence=self._goal_name, done=done, rewards=rewards) - reward = -1.0 + dist, dot, loc, agent_dir = self._get_goal_dist(goal) distraction_penalty, prev_min_dist_to_distraction = ( self._get_distraction_penalty(loc, dot, @@ -597,16 +606,31 @@ def _run_one_goal(self, goal, move_distractions=True): success = False if dist < self._success_distance_thresh and self._within_angle(dot): success = True - reward = 1.0 - distraction_penalty + reward = 0 self._push_reward_queue(max(reward, 0)) if self._goal_conditioned: - reward -= 1. + if not self._sparse_reward: + dense_reward = ( + self._prev_dist - dist) / self._initial_dist + reward = self._goal_reward + dense_reward if self._multi_dim_reward: - rewards = np.array([0, -distraction_penalty]) + rewards = np.array([reward, -distraction_penalty]) + reward -= distraction_penalty else: + reward = self._goal_reward - distraction_penalty if self._multi_dim_reward: - rewards = np.array([1, -distraction_penalty]) + rewards = np.array( + [self._goal_reward, -distraction_penalty]) else: + if self._sparse_reward: + reward = 0 + if self._goal_conditioned: + reward = -1 + else: + reward = (self._prev_dist - dist) / self._initial_dist + if self._multi_dim_reward: + rewards = np.array([reward, -distraction_penalty]) + reward -= distraction_penalty self._push_reward_queue(0) logging.debug("took more than {} steps".format( str(self._max_steps))) From 87294068adb99e6d6622733b28c383f88d24d87b Mon Sep 17 00:00:00 2001 From: Le Horizon Date: Wed, 12 Oct 2022 10:30:17 -0700 Subject: [PATCH 53/53] allow position and speed goal multiplier in GoalTask to be roughly at the same scale as yaw. It effectively reduces the goal threshold, making the goals harder to achieve. --- python/social_bot/tasks.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py index b8e40614..7f56c67c 100644 --- a/python/social_bot/tasks.py +++ b/python/social_bot/tasks.py @@ -183,6 +183,7 @@ def __init__(self, speed_goal_limit=1.6, pose_goal=False, yaw_and_speed=False, + pos_goal_multiplier=0, use_aux_achieved=False, xy_only_aux=False, multi_dim_reward=False, @@ -230,6 +231,8 @@ def __init__(self, into observations, instead of aux_achieved, so only pose is in aux_achieved. yaw_and_speed (bool): When speed_goal is True, and not pose_goal, if yaw_and_speed is True, will include yaw and x-y speed into goal, nothing else. + pos_goal_multiplier (float): multiplier to apply on x-y position dimension before computing goal distance. + This is to make sure the effect of the x-y is similar as other dimensions of the goal, especially yaw. use_aux_achieved (bool): if True, pull out speed, pose dimensions into a separate field: aux_achieved. Only valid when goal_conditioned is True. xy_only_aux (bool): exclude irrelevant dimensions (z-axis movements) from @@ -300,6 +303,7 @@ def __init__(self, self._speed_goal_limit = speed_goal_limit self._pose_goal = pose_goal self._yaw_and_speed = yaw_and_speed + self._pos_goal_multiplier = pos_goal_multiplier self._use_aux_achieved = use_aux_achieved self._xy_only_aux = xy_only_aux self._multi_dim_reward = multi_dim_reward @@ -467,8 +471,14 @@ def _get_goal_dist(self, goal): _goal_loc = goal_loc.copy() _loc = np.array(loc).copy() if self._speed_goal: - _goal_loc = np.concatenate((_goal_loc, self._aux_desired), axis=0) - aux = self._get_agent_aux_dims() + aux_g = self._aux_desired.copy() + aux = self._get_agent_aux_dims().copy() + if self._pos_goal_multiplier > 0: + _loc *= self._pos_goal_multiplier + _goal_loc *= self._pos_goal_multiplier + aux_g[:2] *= self._pos_goal_multiplier * 2 + aux[:2] *= self._pos_goal_multiplier * 2 + _goal_loc = np.concatenate((_goal_loc, aux_g), axis=0) if self._pose_goal: aux = aux[-1:] # yaw elif self._yaw_and_speed: