diff --git a/README.md b/README.md
index 4f6b3f64..17a3fa88 100644
--- a/README.md
+++ b/README.md
@@ -19,6 +19,11 @@ You also need to install the following packages:
 apt install python3-tk
 ```
 
+## Download
+```bash
+git clone --recursive https://github.com/HorizonRobotics/SocialRobot
+```
+
 ## To compile
 ```bash
 cd REPO_ROOT
@@ -27,7 +32,7 @@ mkdir build
 cd build
 cmake ..
 make -j
-cd REPO_ROOT
+cd ..  # REPO_ROOT
 pip3 install -e .
 ```
 if cmake .. complains about cannot find configuration file provided by "gazebo", run
@@ -165,6 +170,22 @@ It's not so easy, for some task you may need some trials to complete it. An exam
 ### python
 You need to make sure the python you use matches the python found by cmake. You can check this by looking at REPO_ROOT/build/CMakeCache.txt
 
+### Python config cmake errors
+```
+CMake Error at pybind11/tools/FindPythonLibsNew.cmake:95 (message):
+  Python config failure:
+
+Call Stack (most recent call first):
+  pybind11/tools/pybind11Tools.cmake:16 (find_package)
+  pybind11/CMakeLists.txt:33 (include)
+```
+
+Remove the makefiles and build again:
+```bash
+cd REPO_ROOT
+rm -rf build
+```
+
 ### display
 
 You need to make sure your `DISPLAY` environment variable points to a valid display, otherwise camera sensor cannot be created and there will be the following error:
diff --git a/pygazebo/pygazebo.cc b/pygazebo/pygazebo.cc
index cb94ce7c..faccf1e4 100644
--- a/pygazebo/pygazebo.cc
+++ b/pygazebo/pygazebo.cc
@@ -22,6 +22,7 @@
 #include <gazebo/sensors/SensorManager.hh>
 #include <gazebo/sensors/SensorsIface.hh>
 #include <gazebo/util/LogRecord.hh>
+#include <ignition/math/Rand.hh>
 #include <mutex>  // NOLINT
 #include <sstream>
 
@@ -469,6 +470,7 @@ class World {
 
 void Initialize(const std::vector<std::string>& args,
                 int port = 0,
+                int seed = -1,
                 bool quiet = false) {
   if (port != 0) {
     std::string uri = "localhost:" + std::to_string(port);
@@ -477,6 +479,9 @@ void Initialize(const std::vector<std::string>& args,
   if (!gazebo_initialized) {
     gazebo::common::Console::SetQuiet(quiet);
     gazebo::setupServer(args);
+    if (seed >= 0) {
+      ignition::math::Rand::Seed((unsigned int)seed);
+    }
     // gazebo::runWorld uses World::RunLoop(). RunLoop() starts LogWorker()
     // every time. LogWorker will always store something in the buffer when
     // it is started. But we don't have a running LogRecord to collect all
@@ -568,6 +573,7 @@ PYBIND11_MODULE(pygazebo, m) {
         "Initialize",
         py::arg("args") = std::vector<std::string>(),
         py::arg("port") = 0,
+        py::arg("seed") = -1,
         py::arg("quiet") = false);
 
   m.def("close", &Close, "Shutdown everything of gazebo");
diff --git a/python/social_bot/envs/gazebo_base.py b/python/social_bot/envs/gazebo_base.py
index 4642a30d..f284b85f 100644
--- a/python/social_bot/envs/gazebo_base.py
+++ b/python/social_bot/envs/gazebo_base.py
@@ -43,6 +43,8 @@ def __init__(self,
                  world_config=None,
                  sim_time_precision=0.001,
                  port=None,
+                 seed=-1,
+                 use_port_in_gz_seed=True,
                  quiet=False):
         """
         Args:
@@ -52,6 +54,10 @@ def __init__(self,
                 see `_modify_world_xml` for details
              sim_time_precision (float): the time precision of the simulator
              port (int): Gazebo port
+             seed (int): random seed for gazebo, -1 for using the default: PID
+             use_port_in_gz_seed (bool): if True, use port and seed in gazebo seed.
+                Set to False when doing repeatability debugging.  Set to True when
+                running several envs in parallel during regular training.
              quiet (bool) Set quiet output
         """
         os.environ["GAZEBO_MODEL_DATABASE_URI"] = ""
@@ -61,12 +67,17 @@ def __init__(self,
         # This avoids different parallel simulations having the same randomness.
         # When calling from alf, alf.environments.utils.create_environment calls
         # env.seed() afterwards, which is the real seed being used.  Not this one.
-        random.seed(port)
+        _seed = port
+        if seed >= 0:
+            _seed = seed
+            if use_port_in_gz_seed:
+                _seed += port
+        random.seed(_seed)
         self._rendering_process = None
         self._rendering_camera = None
         # the default camera pose for rendering rgb_array, could be override
         self._rendering_cam_pose = "10 -10 6 0 0.4 2.4"
-        gazebo.initialize(port=port, quiet=quiet)
+        gazebo.initialize(port=port, seed=_seed, quiet=quiet)
 
         if world_file:
             world_file_abs_path = os.path.join(social_bot.get_world_dir(),
diff --git a/python/social_bot/envs/play_ground.py b/python/social_bot/envs/play_ground.py
index 68ef5f0b..2f63bd4b 100644
--- a/python/social_bot/envs/play_ground.py
+++ b/python/social_bot/envs/play_ground.py
@@ -36,7 +36,7 @@
 from social_bot.envs.gazebo_base import GazeboEnvBase
 from social_bot.teacher import TaskGroup
 from social_bot.teacher import TeacherAction
-from social_bot.tasks import GoalTask, ICubAuxiliaryTask, KickingBallTask, Reaching3D
+from social_bot.tasks import GoalTask, PushReachTask, ICubAuxiliaryTask, KickingBallTask, Reaching3D
 
 
 @gin.configurable
@@ -173,7 +173,7 @@ def __init__(self,
         self._teacher = teacher.Teacher(task_groups_exclusive=False)
         self._has_goal_task = False
         for task in tasks:
-            if task == GoalTask:
+            if task in (GoalTask, PushReachTask):
                 self._has_goal_task = True
             task_group = TaskGroup()
             task_group.add_task(task(env=self, max_steps=max_steps))
diff --git a/python/social_bot/models/pioneer2dx_noplugin_ghost/model-ghost.sdf b/python/social_bot/models/pioneer2dx_noplugin_ghost/model-ghost.sdf
new file mode 100644
index 00000000..de649101
--- /dev/null
+++ b/python/social_bot/models/pioneer2dx_noplugin_ghost/model-ghost.sdf
@@ -0,0 +1,102 @@
+<?xml version="1.0" ?>
+<sdf version="1.5">
+  <model name="pioneer2dx_noplugin_ghost">
+    <static>true</static>
+    <link name="chassis">
+      <pose>0 0 0.16 0 0 0</pose>
+      <inertial>
+        <mass>5.67</mass>
+        <inertia>
+          <ixx>0.07</ixx>
+          <iyy>0.08</iyy>
+          <izz>0.10</izz>
+          <ixy>0</ixy>
+          <ixz>0</ixz>
+          <iyz>0</iyz>
+        </inertia>
+      </inertial>
+      <visual name="visual">
+        <pose>0 0 0.04 0 0 0</pose>
+        <geometry>
+          <mesh>
+            <uri>model://pioneer2dx/meshes/chassis.dae</uri>
+          </mesh>
+        </geometry>
+        <transparency>0.7</transparency>
+      </visual>
+      <visual name="castor_visual">
+        <transparency>0.7</transparency>
+        <pose>-0.200 0 -0.12 0 0 0</pose>
+        <geometry>
+          <sphere>
+            <radius>0.04</radius>
+          </sphere>
+        </geometry>
+        <material>
+          <script>
+            <uri>file://media/materials/scripts/gazebo.material</uri>
+            <name>Gazebo/FlatBlack</name>
+          </script>
+        </material>
+      </visual>
+    </link>
+    <link name="right_wheel">
+      <pose>0.1 -.17 0.11 0 1.5707 1.5707</pose>
+      <inertial>
+        <mass>1.5</mass>
+        <inertia>
+          <ixx>0.0051</ixx>
+          <iyy>0.0051</iyy>
+          <izz>0.0090</izz>
+          <ixy>0</ixy>
+          <ixz>0</ixz>
+          <iyz>0</iyz>
+        </inertia>
+      </inertial>
+      <visual name="visual">
+        <transparency>0.7</transparency>
+        <geometry>
+          <cylinder>
+            <radius>0.11</radius>
+            <length>0.05</length>
+          </cylinder>
+        </geometry>
+        <material>
+          <script>
+            <uri>file://media/materials/scripts/gazebo.material</uri>
+            <name>Gazebo/FlatBlack</name>
+          </script>
+        </material>
+      </visual>
+    </link>
+    <link name="left_wheel">
+      <pose>0.1 .17 0.11 0 1.5707 1.5707</pose>
+      <inertial>
+        <mass>1.5</mass>
+        <inertia>
+          <ixx>0.0051</ixx>
+          <iyy>0.0051</iyy>
+          <izz>0.0090</izz>
+          <ixy>0</ixy>
+          <ixz>0</ixz>
+          <iyz>0</iyz>
+        </inertia>
+      </inertial>
+      <visual name="visual">
+        <transparency>0.7</transparency>
+        <geometry>
+          <cylinder>
+            <radius>0.11</radius>
+            <length>0.05</length>
+          </cylinder>
+        </geometry>
+        <material>
+          <script>
+            <uri>file://media/materials/scripts/gazebo.material</uri>
+            <name>Gazebo/FlatBlack</name>
+          </script>
+        </material>
+      </visual>
+    </link>
+  </model>
+</sdf>
diff --git a/python/social_bot/models/pioneer2dx_noplugin_ghost/model.config b/python/social_bot/models/pioneer2dx_noplugin_ghost/model.config
new file mode 100644
index 00000000..71cee123
--- /dev/null
+++ b/python/social_bot/models/pioneer2dx_noplugin_ghost/model.config
@@ -0,0 +1,21 @@
+<?xml version="1.0"?>
+
+<model>
+  <!-- This is essentially same as the original Pioneer 2DX model without the model plugin -->
+  <name>Pioneer 2DX Ghost</name>
+  <version>1.0</version>
+  <sdf version="1.5">model-ghost.sdf</sdf>
+
+  <author>
+    <name>Nate Koenig</name>
+    <email>nate@osrfoundation.org</email>
+  </author>
+
+  <description>
+    A model of the pioneer 2 dx robot. This is same as https://bitbucket.org/osrf/gazebo_models/src/default/pioneer2dx/
+    with the plugin removed.
+  </description>
+
+  <depend>
+  </depend>
+</model>
diff --git a/python/social_bot/models/target_ball/model.sdf b/python/social_bot/models/target_ball/model.sdf
index 7e5a742a..0e80a542 100644
--- a/python/social_bot/models/target_ball/model.sdf
+++ b/python/social_bot/models/target_ball/model.sdf
@@ -8,7 +8,7 @@
       <visual name="visual">
         <geometry>
           <sphere>
-            <radius>0.03</radius>
+            <radius>0.1</radius>
           </sphere>
         </geometry>
         <material>
diff --git a/python/social_bot/models/wood_cube_30cm_without_offset/model.config b/python/social_bot/models/wood_cube_30cm_without_offset/model.config
new file mode 100644
index 00000000..66ed48ec
--- /dev/null
+++ b/python/social_bot/models/wood_cube_30cm_without_offset/model.config
@@ -0,0 +1,18 @@
+<?xml version="1.0"?>
+
+<model>
+  <name>Wood cube 30cm</name>
+  <version>1.0</version>
+  <sdf version='1.5'>model.sdf</sdf>
+
+  <author>
+    <name>Nate Koenig</name>
+    <email>nate@osrfoundation.org</email>
+  </author>
+
+  <description>
+    A 30cm^3 wooden cube modified from the 10x10x10 cm wooden cube.
+    The 5cm height offset of the internal link has been removed from the original version.
+    Also reduced mass by 0.3, and friction to 1/2.
+  </description>
+</model>
diff --git a/python/social_bot/models/wood_cube_30cm_without_offset/model.sdf b/python/social_bot/models/wood_cube_30cm_without_offset/model.sdf
new file mode 100644
index 00000000..10db372c
--- /dev/null
+++ b/python/social_bot/models/wood_cube_30cm_without_offset/model.sdf
@@ -0,0 +1,63 @@
+<?xml version="1.0" ?>
+<sdf version="1.5">
+  <model name="wood_cube_30cm_without_offset">
+    <link name="link">
+      <inertial>
+        <mass>0.2167</mass>
+        <inertia>
+          <ixx>0.0008611666666666669</ixx>
+          <ixy>0</ixy>
+          <ixz>0</ixz>
+          <iyy>0.0008611666666666669</iyy>
+          <iyz>0</iyz>
+          <izz>0.0008611666666666669</izz>
+        </inertia>
+      </inertial>
+
+      <collision name="collision">
+        <geometry>
+          <box>
+            <size>0.3 0.3 0.3</size>
+          </box>
+        </geometry>
+        <surface>
+          <contact>
+            <!-- Red Pine coefficients for longitudinal axis of the wood
+                 according to:
+                 http://www.fpl.fs.fed.us/documnts/fplgtr/fplgtr113/ch04.pdf -->
+            <poissons_ratio>0.347</poissons_ratio>
+            <elastic_modulus>8.8e+09</elastic_modulus>
+            <ode>
+              <kp>100000</kp>
+              <kd>100</kd>
+              <max_vel>100.0</max_vel>
+              <min_depth>0.001</min_depth>
+            </ode>
+          </contact>
+          <friction>
+            <torsional>
+              <coefficient>0.5</coefficient>
+              <use_patch_radius>0</use_patch_radius>
+              <surface_radius>0.01</surface_radius>
+            </torsional>
+          </friction>
+        </surface>
+      </collision>
+
+      <visual name="visual">
+        <geometry>
+          <box>
+            <size>0.3 0.3 0.3</size>
+          </box>
+        </geometry>
+        <material>
+          <script>
+            <uri>file://media/materials/scripts/gazebo.material</uri>
+            <name>Gazebo/Wood</name>
+          </script>
+        </material>
+      </visual>
+
+    </link>
+  </model>
+</sdf>
diff --git a/python/social_bot/tasks.py b/python/social_bot/tasks.py
index 9512683d..7f56c67c 100644
--- a/python/social_bot/tasks.py
+++ b/python/social_bot/tasks.py
@@ -173,24 +173,33 @@ class GoalTask(Task):
     def __init__(self,
                  env,
                  max_steps,
-                 goal_name="ball",
+                 goal_name="pioneer2dx_noplugin_ghost",
                  distraction_list=[
                      'coke_can', 'table', 'car_wheel', 'plastic_cup', 'beer'
                  ],
                  goal_conditioned=False,
+                 goal_reward=1.,
+                 speed_goal=False,
+                 speed_goal_limit=1.6,
+                 pose_goal=False,
+                 yaw_and_speed=False,
+                 pos_goal_multiplier=0,
                  use_aux_achieved=False,
                  xy_only_aux=False,
                  multi_dim_reward=False,
                  end_on_hitting_distraction=False,
                  end_episode_after_success=False,
                  reset_time_limit_on_success=True,
+                 chain_task_rate=0,
                  success_distance_thresh=0.5,
                  fail_distance_thresh=2.0,
                  distraction_penalty_distance_thresh=0,
                  distraction_penalty=0.5,
+                 random_agent_position=False,
                  random_agent_orientation=False,
                  sparse_reward=True,
                  random_range=5.0,
+                 min_distance=0,
                  polar_coord=True,
                  random_goal=False,
                  use_curriculum_training=False,
@@ -215,6 +224,15 @@ def __init__(self,
             goal_name (string): name of the goal in the world
             distraction_list (list of string): a list of model. the model shoud be in gazebo database
             goal_conditioned (bool): if True, each step has -1 reward, unless at goal state, which gives 0.
+            goal_reward (float): magnitude of the reward for reaching goal during dense reward setting.
+            speed_goal (bool): if True, use speed pose etc. together with position as part of goal.
+            speed_goal_limit (float): randomly sample speed goal in the range: -limit to +limit.
+            pose_goal (bool): When speed_goal is True, if pose_goal is True, speed and everything else is put
+                into observations, instead of aux_achieved, so only pose is in aux_achieved.
+            yaw_and_speed (bool): When speed_goal is True, and not pose_goal, if yaw_and_speed is True,
+                will include yaw and x-y speed into goal, nothing else.
+            pos_goal_multiplier (float): multiplier to apply on x-y position dimension before computing goal distance.
+                This is to make sure the effect of the x-y is similar as other dimensions of the goal, especially yaw.
             use_aux_achieved (bool): if True, pull out speed, pose dimensions into a separate
                 field: aux_achieved.  Only valid when goal_conditioned is True.
             xy_only_aux (bool): exclude irrelevant dimensions (z-axis movements) from
@@ -225,6 +243,10 @@ def __init__(self,
             end_on_hitting_distraction (bool): whether to end episode on hitting distraction
             reset_time_limit_on_success (bool): if not ending after success, if hit success before time limit,
                 reset clock to 0.
+            chain_task_rate (float): if positive, with this much probability, the current task will be chained together
+                with the next task: after one tasks finishes, episode doesn't end, but keeps onto the second goal/task.
+                If the first task isn't achieved within max_steps, episode ends.  When the first task is achieved,
+                step_type is MID and no goal reward is given.  Reward is given only after achieving all chained tasks.
             success_distance_thresh (float): the goal is reached if it's within this distance to the agent
             fail_distance_thresh (float): if the agent moves away from the goal more than this distance,
                 it's considered a failure and is given reward -1
@@ -233,11 +255,14 @@ def __init__(self,
                 facing it is considered hitting a distraction)
             distraction_penalty (float): positive float of how much to penalize getting too close to
                 distraction objects
+            random_agent_position (bool): whether randomize the position of the agent at beginning of episode.
             random_agent_orientation (bool): whether randomize the orientation (yaw) of the agent at the beginning of an
                 episode.
             sparse_reward (bool): if true, the reward is -1/0/1, otherwise the 0 case will be replaced
                 with normalized distance the agent get closer to goal.
             random_range (float): the goal's random position range
+            min_distance (float): the goal must be this minimum distance away from avoided locations.  If this is smaller
+                than the success_distance_thresh, then success_distance_thresh is used instead.
             polar_coord (bool): use cartesian coordinates in random_range, otherwise, use polar coord.
             random_goal (bool): if True, teacher will randomly select goal from the object list each episode
             use_curriculum_training (bool): when true, use curriculum in goal task training
@@ -273,12 +298,19 @@ def __init__(self,
             env=env, max_steps=max_steps, reward_weight=reward_weight)
         self._goal_name = goal_name
         self._goal_conditioned = goal_conditioned
+        self._goal_reward = goal_reward
+        self._speed_goal = speed_goal
+        self._speed_goal_limit = speed_goal_limit
+        self._pose_goal = pose_goal
+        self._yaw_and_speed = yaw_and_speed
+        self._pos_goal_multiplier = pos_goal_multiplier
         self._use_aux_achieved = use_aux_achieved
         self._xy_only_aux = xy_only_aux
         self._multi_dim_reward = multi_dim_reward
         self.end_on_hitting_distraction = end_on_hitting_distraction
         self._end_episode_after_success = end_episode_after_success
         self._reset_time_limit_on_success = reset_time_limit_on_success
+        self._chain_task_rate = chain_task_rate
         self._success_distance_thresh = success_distance_thresh
         self._fail_distance_thresh = fail_distance_thresh
         self._distraction_penalty_distance_thresh = distraction_penalty_distance_thresh
@@ -286,6 +318,7 @@ def __init__(self,
             assert distraction_penalty_distance_thresh < success_distance_thresh
         self._distraction_penalty = distraction_penalty
         self._sparse_reward = sparse_reward
+        self._random_agent_position = random_agent_position
         self._random_agent_orientation = random_agent_orientation
         self._use_curriculum_training = use_curriculum_training
         self._curriculum_distractions = curriculum_distractions
@@ -299,14 +332,16 @@ def __init__(self,
         if random_goal and goal_name not in distraction_list:
             distraction_list.append(goal_name)
         self._distraction_list = distraction_list
-        self._object_list = distraction_list
-        if goal_name and goal_name not in distraction_list:
+        self._object_list = distraction_list.copy()
+        if goal_name and goal_name not in self._object_list:
             self._object_list.append(goal_name)
         self._goals = self._object_list
         self._move_goal_during_episode = move_goal_during_episode
         self._success_with_angle_requirement = success_with_angle_requirement
         if not additional_observation_list:
             additional_observation_list = self._object_list
+        else:
+            self._object_list.extend(additional_observation_list)
         self._additional_observation_list = additional_observation_list
         self._pos_list = list(
             itertools.product(
@@ -334,8 +369,11 @@ def __init__(self,
                 self._reward_thresh_to_increase_range)
         else:
             self._random_range = random_range
+        if min_distance < self._success_distance_thresh:
+            min_distance = self._success_distance_thresh
+        self._min_distance = min_distance
         self._goal_dist = 0.
-        obs_format = "image"
+        obs_format = "image or full_state"
         obs_relative = "ego"
         if use_egocentric_states:
             obs_format = "full_state"
@@ -401,6 +439,8 @@ def _prepare_teacher_action(self,
         goal_dist = 0.
         if rewards is not None:
             rewards = rewards.astype(np.float32)
+        # only output when episode done, otherwise, reward accumulator in tensorboard
+        # may miss it.
         if done:
             goal_dist = self._goal_dist
             # clear self._goal_dist so it is only output once
@@ -416,36 +456,80 @@ def _prepare_teacher_action(self,
     def _within_angle(self, dot):
         return (not self._success_with_angle_requirement) or dot > 0.707
 
+    def _get_agent_aux_dims(self, agent_pose=None, agent_vel=None):
+        if agent_pose is None:
+            agent_pose = np.array(self._agent.get_pose()).flatten()
+        if agent_vel is None:
+            agent_vel = np.array(self._agent.get_velocities()).flatten()
+        # 0, 1, 2: vel; 3, 4, 5: angular vel; 6: z position; 7, 8, 9: roll pitch yaw
+        return np.concatenate((agent_vel, agent_pose[2:]), axis=0)
+
     def _get_goal_dist(self, goal):
         loc, agent_dir = self._get_agent_loc()
         goal_loc, _ = goal.get_pose()
         goal_loc = np.array(goal_loc)
-        dist = np.linalg.norm(loc - goal_loc)
+        _goal_loc = goal_loc.copy()
+        _loc = np.array(loc).copy()
+        if self._speed_goal:
+            aux_g = self._aux_desired.copy()
+            aux = self._get_agent_aux_dims().copy()
+            if self._pos_goal_multiplier > 0:
+                _loc *= self._pos_goal_multiplier
+                _goal_loc *= self._pos_goal_multiplier
+                aux_g[:2] *= self._pos_goal_multiplier * 2
+                aux[:2] *= self._pos_goal_multiplier * 2
+            _goal_loc = np.concatenate((_goal_loc, aux_g), axis=0)
+            if self._pose_goal:
+                aux = aux[-1:]  # yaw
+            elif self._yaw_and_speed:
+                aux = np.concatenate((aux[:2], aux[-1:]), axis=0)
+            _loc = np.concatenate((_loc, aux), axis=0)
+
+        dist = np.linalg.norm(_loc - _goal_loc)
         # dir from get_pose is (roll, pitch, yaw)
         dir = np.array([math.cos(agent_dir[2]), math.sin(agent_dir[2])])
         goal_dir = (goal_loc[0:2] - loc[0:2]) / dist
         dot = sum(dir * goal_dir)
-        return dist, dot, loc
+        return dist, dot, loc, agent_dir
 
     def run(self):
         """ Start a teaching episode for this task. """
         agent_sentence = yield
         self._agent.reset()
-        if self._random_agent_orientation:
+        if self._random_agent_orientation or self._random_agent_position:
             loc, agent_dir = self._agent.get_pose()
-            self._agent.set_pose((loc, (agent_dir[0], agent_dir[1],
-                                        2 * math.pi * random.random())))
-        a_loc, a_dir = self._get_agent_loc()
+            if self._random_agent_position:
+                loc = (self._max_play_ground_size * (1 - 2 * random.random()),
+                       self._max_play_ground_size * (1 - 2 * random.random()),
+                       loc[2])
+            if self._random_agent_orientation:
+                agent_dir = (agent_dir[0], agent_dir[1],
+                             2 * math.pi * random.random())
+            self._agent.set_pose((loc, agent_dir))
         self._random_move_objects()
         self.pick_goal()
         goal = self._world.get_model(self._goal_name)
-        self._move_goal(goal, a_loc, a_dir)
+        done = False
+        gen = self._run_one_goal(goal)
+        while not done:
+            action = next(gen)
+            if (action.done and action.success and self._chain_task_rate > 0
+                    and random.random() < self._chain_task_rate):
+                action.done = False
+                gen = self._run_one_goal(goal, move_distractions=False)
+            done = action.done
+            yield action
+
+    def _run_one_goal(self, goal, move_distractions=True):
+        """Generator function of task feedback."""
+        a_loc, a_dir = self._get_agent_loc()
+        self._move_goal(goal, a_loc, a_dir, move_distractions)
         steps_since_last_reward = 0
         prev_min_dist_to_distraction = 100
         rewards = None  # reward array in multi_dim_reward case
         while steps_since_last_reward < self._max_steps:
             steps_since_last_reward += 1
-            dist, dot, loc = self._get_goal_dist(goal)
+            dist, dot, loc, agent_dir = self._get_goal_dist(goal)
             distraction_penalty, prev_min_dist_to_distraction = (
                 self._get_distraction_penalty(loc, dot,
                                               prev_min_dist_to_distraction))
@@ -454,16 +538,24 @@ def run(self):
             if dist < self._success_distance_thresh and self._within_angle(
                     dot):
                 # within 45 degrees of the agent direction
-                reward = 1.0 - distraction_penalty
+                reward = 0
                 self._push_reward_queue(max(reward, 0))
                 if self._goal_conditioned:
-                    reward -= 1.
+                    if not self._sparse_reward:
+                        dense_reward = (
+                            self._prev_dist - dist) / self._initial_dist
+                        reward = self._goal_reward + dense_reward
                     if self._multi_dim_reward:
-                        rewards = np.array([0, -distraction_penalty])
+                        rewards = np.array([reward, -distraction_penalty])
+                    reward -= distraction_penalty
                 else:
+                    reward = self._goal_reward - distraction_penalty
                     if self._multi_dim_reward:
-                        rewards = np.array([1, -distraction_penalty])
+                        rewards = np.array(
+                            [self._goal_reward, -distraction_penalty])
                 logging.debug("yielding reward: " + str(reward))
+                logging.debug("at location: %s, aux: %s", a_loc.astype('|S5'),
+                              self._get_agent_aux_dims().astype('|S5'))
                 done = self._end_episode_after_success
                 agent_sentence = yield self._prepare_teacher_action(
                     reward=reward,
@@ -502,7 +594,7 @@ def run(self):
                     reward = (self._prev_dist - dist) / self._initial_dist
                 if self._multi_dim_reward:
                     rewards = np.array([reward, -distraction_penalty])
-                reward = reward - distraction_penalty
+                reward -= distraction_penalty
                 done = False
                 if distraction_penalty > 0:
                     logging.debug("yielding reward: " + str(reward))
@@ -514,8 +606,8 @@ def run(self):
                     sentence=self._goal_name,
                     done=done,
                     rewards=rewards)
-        reward = -1.0
-        dist, dot, loc = self._get_goal_dist(goal)
+
+        dist, dot, loc, agent_dir = self._get_goal_dist(goal)
         distraction_penalty, prev_min_dist_to_distraction = (
             self._get_distraction_penalty(loc, dot,
                                           prev_min_dist_to_distraction))
@@ -524,20 +616,44 @@ def run(self):
         success = False
         if dist < self._success_distance_thresh and self._within_angle(dot):
             success = True
-            reward = 1.0 - distraction_penalty
+            reward = 0
             self._push_reward_queue(max(reward, 0))
             if self._goal_conditioned:
-                reward -= 1.
+                if not self._sparse_reward:
+                    dense_reward = (
+                        self._prev_dist - dist) / self._initial_dist
+                    reward = self._goal_reward + dense_reward
                 if self._multi_dim_reward:
-                    rewards = np.array([0, -distraction_penalty])
+                    rewards = np.array([reward, -distraction_penalty])
+                reward -= distraction_penalty
             else:
+                reward = self._goal_reward - distraction_penalty
                 if self._multi_dim_reward:
-                    rewards = np.array([1, -distraction_penalty])
+                    rewards = np.array(
+                        [self._goal_reward, -distraction_penalty])
         else:
+            if self._sparse_reward:
+                reward = 0
+                if self._goal_conditioned:
+                    reward = -1
+            else:
+                reward = (self._prev_dist - dist) / self._initial_dist
+            if self._multi_dim_reward:
+                rewards = np.array([reward, -distraction_penalty])
+            reward -= distraction_penalty
             self._push_reward_queue(0)
             logging.debug("took more than {} steps".format(
                 str(self._max_steps)))
-        logging.debug("yielding reward: {}".format(str(reward)))
+        agent_vel = np.array(self._agent.get_velocities()).flatten()
+
+        def _str(arr):
+            res = ["["]
+            res.extend(["{:.2f}".format(e) for e in arr])
+            res.append("]")
+            return " ".join(res)
+
+        logging.debug("yielding reward: %s at\nloc %s pose %s vel %s",
+                      str(reward), _str(loc), _str(agent_dir), _str(agent_vel))
         if self.should_use_curriculum_training():
             logging.debug("reward queue len: {}, sum: {}".format(
                 str(len(self._q)), str(sum(self._q))))
@@ -578,40 +694,66 @@ def _get_distraction_penalty(self, agent_loc, dot,
             prev_min_dist_to_distraction = curr_min_dist
         return distraction_penalty, prev_min_dist_to_distraction
 
-    def _move_goal(self, goal, agent_loc, agent_dir):
+    def _move_goal(self, goal, agent_loc, agent_dir, move_distractions=True):
         """
         Move goal as well as all distraction objects to a random location.
         """
         avoid_locations = [agent_loc]
+        if not move_distractions:
+            for item in self._distraction_list:
+                if item is not self._goal_name:
+                    obj = self._world.get_model(item)
+                    if obj:
+                        distraction_loc = obj.get_pose()[0]
+                        avoid_locations.append(distraction_loc)
+        if self._speed_goal:
+            MAX_SPEED = self._speed_goal_limit * 2
+            xspeed = (0.5 - random.random()) * MAX_SPEED
+            yspeed = (0.5 - random.random()) * MAX_SPEED
+            yawspeed = (0.5 - random.random()) * MAX_SPEED
+            yaw = np.arctan2(
+                yspeed, xspeed)  # numpy.arctan2 is defined as arctan(x0/x1)
+            # 0, 1, 2: vel; 3, 4, 5: angular vel; 6: z position; 7, 8, 9: roll pitch yaw
+            self._aux_desired = np.array(
+                [xspeed, yspeed, 0, 0, 0, yawspeed, 0, 0, 0, yaw])
+            if self._pose_goal:
+                self._aux_desired = np.array([yaw])
+            elif self._yaw_and_speed:
+                self._aux_desired = np.array([xspeed, yspeed, yaw])
         loc, dist = self._move_obj(
             obj=goal,
             agent_loc=agent_loc,
             agent_dir=agent_dir,
             is_goal=True,
-            avoid_locations=avoid_locations)
+            avoid_locations=avoid_locations,
+            name="goal")
         self._goal_dist += dist
         avoid_locations.append(loc)
         distractions = OrderedDict()
-        for item in self._distraction_list:
-            if item is not self._goal_name:
-                distractions[item] = 1
-        if len(distractions) and self._curriculum_distractions:
-            for item, _ in distractions.items():
-                distraction = self._world.get_agent(item)
-                loc, _ = self._move_obj(
-                    obj=distraction,
-                    agent_loc=agent_loc,
-                    agent_dir=agent_dir,
-                    is_goal=False,
-                    avoid_locations=avoid_locations)
-                avoid_locations.append(loc)
+        if move_distractions:
+            for item in self._distraction_list:
+                if item is not self._goal_name:
+                    distractions[item] = 1
+            if len(distractions) and self._curriculum_distractions:
+                for item, _ in distractions.items():
+                    distraction = self._world.get_agent(item)
+                    loc, _ = self._move_obj(
+                        obj=distraction,
+                        agent_loc=agent_loc,
+                        agent_dir=agent_dir,
+                        is_goal=False,
+                        avoid_locations=avoid_locations,
+                        name=item)
+                    avoid_locations.append(loc)
 
     def _move_obj(self,
                   obj,
                   agent_loc,
                   agent_dir,
                   is_goal=True,
-                  avoid_locations=[]):
+                  avoid_locations=[],
+                  close_to_agent=False,
+                  name="Unspecified"):
         if (self.should_use_curriculum_training()
                 and self._percent_full_range_in_curriculum > 0
                 and random.random() < self._percent_full_range_in_curriculum):
@@ -622,9 +764,14 @@ def _move_obj(self,
             self._is_full_range_in_curriculum = False
         attempts = 0
         dist = range
+        _min_distance = self._min_distance
+        _succ_thd = self._success_distance_thresh
+        if close_to_agent:
+            _min_distance = 0.3
+            _succ_thd = _min_distance
         while True:
             attempts += 1
-            dist = random.random() * range
+            dist = random.random() * (range - _succ_thd) + _succ_thd
             if self._curriculum_target_angle:
                 angle_range = self._random_angle
             else:
@@ -639,28 +786,42 @@ def _move_obj(self,
                 loc = np.asarray((random.random() * range - range / 2,
                                   random.random() * range - range / 2, 0))
 
-            self._initial_dist = np.linalg.norm(loc - agent_loc)
             satisfied = True
             if (abs(loc[0]) > self._max_play_ground_size or abs(loc[1]) >
                     self._max_play_ground_size):  # not within walls
                 satisfied = False
             for avoid_loc in avoid_locations:
-                dist = np.linalg.norm(loc - avoid_loc)
-                if dist < self._success_distance_thresh:
+                ddist = np.linalg.norm(loc - avoid_loc)
+                if ddist < _min_distance:
                     satisfied = False
                     break
-            if satisfied or attempts > 10000:
+            if satisfied or attempts % 100 == 0 or attempts > 300:
                 if not satisfied:
-                    logging.warning(
-                        "Took forever to find satisfying " +
-                        "object location. " +
-                        "agent_loc: {}, range: {}, max_size: {}.".format(
-                            str(agent_loc), str(range),
-                            str(self._max_play_ground_size)))
+                    if attempts <= 300:
+                        _min_distance /= 2.
+                        logging.warning(
+                            "Took {} times to find satisfying {} "
+                            "location. reducing _min_dist to {}".format(
+                                str(attempts), name, str(_min_distance)))
+                        continue
+                    else:
+                        logging.warning(
+                            "Took forever to find satisfying " +
+                            "{} location. " +
+                            "agent_loc: {}, range: {}, _min_dist: {}, max_size"
+                            ": {}.".format(name, str(agent_loc), str(range),
+                                           str(_min_distance),
+                                           str(self._max_play_ground_size)))
+                if is_goal:
+                    self._initial_dist = np.linalg.norm(loc - agent_loc)
                 break
-        self._prev_dist = self._initial_dist
+        if is_goal:
+            self._prev_dist = self._initial_dist
         obj.reset()
-        obj.set_pose((loc, (0, 0, 0)))
+        yaw = 0
+        if is_goal and self._speed_goal:
+            yaw = self._aux_desired[-1]
+        obj.set_pose((loc, (0, 0, yaw)))
         return loc, dist
 
     def _random_move_objects(self, random_range=10.0):
@@ -701,20 +862,34 @@ def generate_goal_conditioned_obs(self, agent):
         obs['observation'] = flat_obs
         obs['achieved_goal'] = agent_pose[0:2]
         obs['desired_goal'] = goal_pose[0:2]
-        if self._use_aux_achieved:
-            # distraction objects' x, y coordinates
+        aux = self._get_agent_aux_dims(agent_pose, agent_vel)
+        if self._speed_goal:
             obs['observation'] = flat_obs[14:]
-            obs['aux_achieved'] = np.concatenate((agent_vel, agent_pose[2:]),
-                                                 axis=0)
-            if self._xy_only_aux:
-                # agent speed: 2: z-speed; 3, 4: angular velocities; 5: yaw-vel,
-                # agent pose: 2: z; 3, 4, 5: roll pitch yaw.
+            if self._pose_goal:
                 obs['observation'] = np.concatenate(
-                    (agent_vel[2:5], agent_pose[2:5], flat_obs[14:]), axis=0)
-                obs['aux_achieved'] = np.concatenate(
-                    (agent_vel[0:2], np.expand_dims(agent_vel[5], 0),
-                     np.expand_dims(agent_pose[5], 0)),
-                    axis=0)
+                    (obs['observation'], aux[:-1]), axis=0)
+                aux = aux[-1:]  # yaw
+            elif self._yaw_and_speed:
+                obs['observation'] = np.concatenate(
+                    (obs['observation'], aux[2:-1]), axis=0)
+                aux = np.concatenate((aux[:2], aux[-1:]), axis=0)
+            obs['achieved_goal'] = np.concatenate((obs['achieved_goal'], aux),
+                                                  axis=0)
+            obs['desired_goal'] = np.concatenate(
+                (obs['desired_goal'], self._aux_desired), axis=0)
+        elif self._use_aux_achieved:
+            # distraction objects' x, y coordinates
+            obs['observation'] = flat_obs[14:]
+            obs['aux_achieved'] = aux
+            # if self._xy_only_aux:
+            #     # agent speed: 2: z-speed; 3, 4: angular velocities; 5: yaw-vel,
+            #     # agent pose: 2: z; 3, 4, 5: roll pitch yaw.
+            #     obs['observation'] = np.concatenate(
+            #         (agent_vel[2:5], agent_pose[2:5], flat_obs[14:]), axis=0)
+            #     obs['aux_achieved'] = np.concatenate(
+            #         (agent_vel[0:2], np.expand_dims(agent_vel[5], 0),
+            #          np.expand_dims(agent_pose[5], 0)),
+            #         axis=0)
         return obs
 
     def task_specific_observation(self, agent):
@@ -781,6 +956,208 @@ def task_specific_observation(self, agent):
         return obs
 
 
+@gin.configurable
+class PushReachTask(GoalTask):
+    def __init__(self,
+                 env,
+                 max_steps,
+                 push_reach=False,
+                 obj_names=['wood_cube_30cm_without_offset'],
+                 goal_names=['goal_indicator'],
+                 distraction_list=['car_wheel'],
+                 multi_dim_reward=True,
+                 close_to_agent=False,
+                 target_relative_to_obj=False,
+                 use_obj_pose=False):
+        """A Push or Push and Reach task.
+
+        We utilize some of the curriculum, distraction obj handling logic in GoalTask.
+
+        Args:
+            push_reach (bool): if False, push only, goal positions are for objects to achieve; otherwise,
+                the first goal position is for the agent to achieve.
+            obj_names (list of string): when not empty, it's the names of the objects to be moved.
+            goal_names (list of string): when not empty, these goal objects indicate the goal locations for
+                the objects in obj_names.
+            close_to_agent (bool): whether to initialize object to be pushed closer to the agent.
+            target_relative_to_obj (bool): initialize target position relative to object vs agent location.
+            use_obj_pose (bool): include object auxiliary dimensions as input.
+        """
+        self._push_reach = push_reach
+        if push_reach:
+            goal_names.append('target_ball')
+        self._obj_names = obj_names
+        self._goal_names = goal_names
+        self._multi_dim_reward = multi_dim_reward
+        self._target_relative_to_obj = target_relative_to_obj
+        self._close_to_agent = close_to_agent
+        self._use_obj_pose = use_obj_pose
+        if not push_reach:
+            assert len(obj_names) == len(goal_names)
+        else:
+            assert len(obj_names) == len(goal_names) - 1
+        super().__init__(
+            env,
+            max_steps,
+            goal_conditioned=True,
+            use_aux_achieved=True,
+            multi_dim_reward=multi_dim_reward,
+            end_episode_after_success=True,
+            goal_name=goal_names[0],
+            distraction_list=distraction_list,
+            additional_observation_list=obj_names + goal_names[1:])
+
+    def _random_move_objects(self, random_range=10.0):
+        pass
+
+    def _move_objs(self,
+                   obj_names,
+                   ap,
+                   ad,
+                   avoids=[],
+                   record_goal_dist=False,
+                   close_to_agent=False,
+                   target_relative_to_obj=False):
+        """Move objects according to agent location."""
+        obj_positions = avoids
+        if target_relative_to_obj:
+            assert len(avoids) == len(obj_names), "num objects != num targets"
+            orig_obj_pos = avoids.copy()
+
+        if record_goal_dist:
+            self._goal_dist = 0
+        n = 0
+        for obj_name in obj_names:
+            obj = self._world.get_agent(obj_name)
+            start_pos = ap
+            _avoids = obj_positions
+            if target_relative_to_obj:
+                start_pos = orig_obj_pos[n]
+                _avoids = _avoids + [ap]
+            p, dist = self._move_obj(
+                obj,
+                start_pos,
+                ad,
+                avoid_locations=obj_positions,
+                name=obj_name,
+                close_to_agent=close_to_agent)
+            if record_goal_dist:
+                self._goal_dist += dist
+            n += 1
+            obj_positions.append(p)
+        if record_goal_dist:
+            self._goal_dist /= n
+        return obj_positions
+
+    def _move_goals(self, ap, ad, obj_locs=[]):
+        """Move goal locations according to object locations."""
+        # TODO(lezhao): copy obj_locs into avoids and put ap in avoids,
+        # then find goal location based on each obj location.
+        return self._move_objs(
+            self._goal_names,
+            ap,
+            ad,
+            obj_locs,
+            record_goal_dist=True,
+            target_relative_to_obj=self._target_relative_to_obj)
+
+    def _move_distractions(self, ap, ad, avoids=[]):
+        return self._move_objs(self._distraction_list, ap, ad, avoids)
+
+    def _get_obj_loc_pose(self, obj_names):
+        res_pos = np.array([])
+        res_aux = np.array([])
+        for obj_name in obj_names:
+            obj = self._world.get_agent(obj_name)
+            obj_pos, obj_dir = obj.get_pose()
+            # vel = np.array(obj.get_velocities()).flatten()
+            res_pos = np.concatenate((res_pos, obj_pos[0:2]))
+            res_aux = np.concatenate((res_aux, obj_pos[2:], obj_dir))  # , vel
+        return res_pos, res_aux
+
+    def _get_achieved(self):
+        achieved_loc = np.array([])
+        aux_achieved = np.array(self._agent.get_velocities()).flatten()
+        # print("gazebo port: ", self._env._port)
+        # print("gazebo aux_ach: ", aux_achieved, flush=True)
+        ap, ad = self._get_agent_loc()
+        if not self._push_reach:
+            aux_achieved = np.concatenate((aux_achieved, ap, ad))
+        else:
+            achieved_loc = np.concatenate((achieved_loc, ap[0:2]))
+            aux_achieved = np.concatenate((aux_achieved, ap[2:], ad))
+        ach_poss, aux_achs = self._get_obj_loc_pose(self._obj_names)
+        achieved_loc = np.concatenate((achieved_loc, ach_poss))
+        if self._use_obj_pose:
+            aux_achieved = np.concatenate((aux_achieved, aux_achs))
+        return achieved_loc, aux_achieved
+
+    def _get_desired_goal(self):
+        return self._get_obj_loc_pose(self._goal_names)[0]
+
+    def generate_goal_conditioned_obs(self, agent):
+        ach, aux_ach = self._get_achieved()
+        obs = OrderedDict()
+        obs['observation'] = self._get_obj_loc_pose(self._distraction_list)[0]
+        obs['achieved_goal'] = ach
+        obs['desired_goal'] = self._get_desired_goal()
+        obs['aux_achieved'] = aux_ach
+        return obs
+
+    def _compute_reward(self):
+        def l2_dist_close_reward_fn(achieved_goal, goal):
+            return 0 if np.linalg.norm(
+                achieved_goal - goal) < self._success_distance_thresh else -1
+
+        return l2_dist_close_reward_fn(self._get_achieved()[0],
+                                       self._get_desired_goal())
+
+    def _produce_rewards(self, prev_min_dist_to_distraction):
+        ap, ad = self._get_agent_loc()
+        distraction_penalty, prev_min_dist_to_distraction = (
+            self._get_distraction_penalty(ap, 1.,
+                                          prev_min_dist_to_distraction))
+        reward = self._compute_reward()
+        done = reward >= 0
+        if done:
+            distraction_penalty = 0
+            av = np.array(self._agent.get_velocities()).flatten()
+            logging.debug("yielding reward: " + str(reward))
+            logging.debug("at location: %s, aux: %s, vel: %s",
+                          ap.astype('|S5'),
+                          np.array(ad).astype('|S5'), av.astype('|S5'))
+        reward += distraction_penalty
+        rewards = None
+        if self._multi_dim_reward:
+            rewards = np.array([reward, -distraction_penalty])
+        return self._prepare_teacher_action(
+            reward=reward,
+            sentence="well done",
+            done=done,
+            success=done,
+            rewards=rewards), prev_min_dist_to_distraction
+
+    def _run_one_goal(self, goal=None, move_distractions=None):
+        # The two params are not used here.
+        ap, ad = self._get_agent_loc()
+        obj_positions = self._move_objs(
+            self._obj_names, ap, ad, close_to_agent=self._close_to_agent)
+        avoids = self._move_goals(ap, ad, obj_positions)
+        avoids = self._move_distractions(ap, ad, avoids)
+        avoids.clear()
+        steps_since_last_reward = 0
+        prev_min_dist_to_distraction = 100
+        while steps_since_last_reward < self._max_steps:
+            steps_since_last_reward += 1
+            (agent_sentence, prev_min_dist_to_distraction
+             ) = self._produce_rewards(prev_min_dist_to_distraction)
+            yield agent_sentence
+        (agent_sentence, prev_min_dist_to_distraction
+         ) = self._produce_rewards(prev_min_dist_to_distraction)
+        agent_sentence.done = True
+        yield agent_sentence
+
+
 @gin.configurable
 class ICubAuxiliaryTask(Task):
     """
diff --git a/python/social_bot/worlds/play_ground.world b/python/social_bot/worlds/play_ground.world
index 1c80a0a8..684849e3 100644
--- a/python/social_bot/worlds/play_ground.world
+++ b/python/social_bot/worlds/play_ground.world
@@ -40,6 +40,13 @@
       </include>
       <pose frame=''>-2 5.7 0 0 -0 0</pose>
     </model>
+    <model name='grey_wall_left2'>
+      <static>1</static>
+      <include>
+        <uri>model://grey_wall</uri>
+      </include>
+      <pose frame=''>2 5.7 0 0 -0 0</pose>
+    </model>
     <model name='grey_wall_right'>
       <static>1</static>
       <include>
@@ -47,6 +54,13 @@
       </include>
       <pose frame=''>-2 -5.7 0 0 -0 0</pose>
     </model>
+    <model name='grey_wall_right2'>
+      <static>1</static>
+      <include>
+        <uri>model://grey_wall</uri>
+      </include>
+      <pose frame=''>2 -5.7 0 0 -0 0</pose>
+    </model>
     <model name='grey_wall_behind0'>
       <static>1</static>
       <include>
@@ -61,5 +75,22 @@
       </include>
       <pose frame=''>-5.7 -2 0 0 -0 1.5708</pose>
     </model>
+    <model name="east_wall">
+      <static>1</static>
+      <pose>5.7 0 0.1 0 0 0</pose>
+      <link name ="link">
+        <pose>0 0 0 0 0 0</pose>
+        <collision name ="collision">
+          <geometry>
+            <box><size>0.2 11.4 0.2</size></box>
+          </geometry>
+        </collision>
+        <visual name ='visual'>
+          <geometry>
+            <box><size>0.2 11.4 0.2</size></box>
+          </geometry>
+        </visual>
+      </link>
+    </model>
   </world>
 </sdf>