diff --git a/README.md b/README.md
index dd1fa7c..3e58f48 100644
--- a/README.md
+++ b/README.md
@@ -20,8 +20,9 @@ An environment is defined by:
 - ActionProcessor: an object of `type BaseAction` that defines functions that process the action. Generally simulator works
                    by applying continous forces/torque, however the desired action space might be discrete or continuous. 
                    `BaseAction` objects allow the user to easily switch between different action spaces. 
-                   
+
 ### Interactive Mode
+
 To run environments in interactive mode, a function mapping the keyboard inputs into commands supplied to the agent must be defined. This function is typically called, `str2action` in `rlmaster` repository. The environment can be run in interactive mode in the following way:
 
 ```
@@ -31,7 +32,9 @@ env.interactive(move_single_env.str2action)
 ```
 You can use the commands, `w, s, d, a` to move the agent and `q` to quit the interactive mode.
 
-                   
+### Cheetah in Interactive Mode
+
+No interface has been developed yet, but after creating an instance of SimpleStackerAgent in from `stacker_agent`, calling `_setup_renderer` and `render` will open a display window. You can step by entering an xy position that will move the second block there instantly. Calling `render` again will show the updated positions of the blocks. They are initialized at position (0,0) if not called with the initializer.
 
 ##Environment in openAI gym format
 
@@ -41,8 +44,7 @@ from core import gym_wrapper
 env = move_agent.get_environment()
 gymEnv = gym_wwapper.GymWrapper(env)
 ```
-                   
-            
+                               
 
 
 
diff --git a/core/gym_wrapper.py b/core/gym_wrapper.py
index 11d38b6..acb490d 100644
--- a/core/gym_wrapper.py
+++ b/core/gym_wrapper.py
@@ -1,6 +1,10 @@
 import numpy as np
-from core import base_environment
+import rlmaster.core.base_environment as base_environment
 from gym import spaces
+from rllab.spaces.box import Box
+from rllab.core.serializable import Serializable
+from rllab.misc.overrides import overrides
+from rllab.misc import logger
 
 class GymWrapper(object):
   def __init__(self, env):
@@ -11,16 +15,15 @@ def __init__(self, env):
     else:
       assert isinstance(self.env.action_processor,
                         base_environment.BaseContinuousAction)
-      self.action_space = spaces.Box(
+      self.action_space = Box(
                       low  = env.action_processor.minval(), 
                       high = env.action_processor.maxval(), 
-                      shape=(env.action_processor.action_dim(), 
-                             env.action_processor.num_actions()))
+                      shape=(env.action_processor.action_dim()))
     obsNdim = self.env.observation_ndim()   
-    obsKeys = obsNdim.keys()
+    obsKeys = list(obsNdim.keys())
     assert len(obsKeys) == 1, 'gym only supports one observation type'
     self._obsKey = obsKeys[0]
-    self.observation_space = spaces.Box(low=0, high=255, shape=obsNdim[obsKeys[0]])
+    self.observation_space = Box(low=0, high=255, shape=obsNdim[obsKeys[0]])
 
   @property
   def frameskip(self):
@@ -59,7 +62,7 @@ def _step(self, action):
     self.env.step(action)
     obs    = self._observation()
     reward = self.env.reward()
-    done   = False
+    done   = self.env.simulator.done # test?
     return obs, reward, done, dict(reward=reward)
 
 
@@ -68,7 +71,7 @@ def step(self, action):
 
 
   def viewer_setup(self):
-    self.env._renderer_setup() 
+    self.env.setup_renderer() 
 
 
   def render(self):
@@ -77,4 +80,3 @@ def render(self):
 
   def _render(self):
     return self.env.render() 
-  
diff --git a/envs/cheetah_agent.py b/envs/cheetah_agent.py
new file mode 100644
index 0000000..cc2351b
--- /dev/null
+++ b/envs/cheetah_agent.py
@@ -0,0 +1,135 @@
+from rlmaster.core.base_environment import *
+import numpy as np
+from overrides import overrides
+from rllab.mujoco_py import MjModel, MjViewer
+
+class HalfCheetahSimulator(BaseSimulator):
+	def __init__(self, **kwargs):
+		super(HalfCheetahSimulator, self).__init__(**kwargs)
+
+		self._imSz = 512
+		self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8)
+
+		self.model = MjModel('../rlmaster/envs/mujoco_envs/xmls/half_cheetah.xml')
+		self.viewer = None
+		self._pos = {}
+		self._pos['torso'] = np.zeros((3,))
+		self._range_min = -1
+		self._range_max = 1
+
+		self.body_comvel = 0
+		self.action = np.zeros((1, 2))
+		self.init_qpos = self.model.data.qpos
+		self.init_qvel = self.model.data.qvel 
+		self.init_qacc = self.model.data.qacc 
+		self.init_ctrl = self.model.data.ctrl
+		self.frame_skip = 1
+
+	@overrides
+	def step(self, ctrl, loop=False):
+		ctrl = np.clip(ctrl, *(np.array([-1, -1, -1, -1, -1, -1]), np.array([1, 1, 1, 1, 1, 1])))
+		self.model.data.ctrl = ctrl # + np.random.normal(size=ctrl.shape)
+		# print('gym', self.model.data.ctrl)
+		for i in range(self.frame_skip):
+		  self.model.step()
+		self.model.forward()
+		ind = self.model.body_names.index('torso')
+		self._pos['torso'] = self.model.body_pos[ind]
+		self.body_comvel = self.model.body_comvels[ind]
+		self.action = ctrl
+
+	@overrides 
+	def get_image(self):
+		data, width, height = self.viewer.get_image()
+		self._im = np.fromstring(data, dtype='uint8').reshape(height, width, 3)[::-1,:,:]
+		# print(self._im)
+		return self._im.copy()
+
+	@overrides
+	def _setup_renderer(self):
+		self.viewer = MjViewer(visible=True, init_width=self._imSz, init_height=self._imSz)
+		self.viewer.start()
+		self.viewer.set_model(self.model)
+
+	@overrides
+	def render(self):
+  		self.viewer.loop_once()
+
+class CheetahIm(BaseObservation):
+
+	def get_body_com(self, body_name):
+		idx = self.simulator.model.body_names.index(body_name)
+		return self.simulator.model.data.com_subtree[idx]
+
+	@overrides
+	def ndim(self):
+		dim = {}
+		dim['im'] = (1, 20)
+		return dim
+
+	@overrides
+	def observation(self):
+		obs = {}
+		current_obs = np.concatenate([
+			self.simulator.model.data.qpos.flatten()[1:],
+			self.simulator.model.data.qvel.flat,
+			self.get_body_com('torso').flat,
+		])
+		obs['im'] = current_obs
+		return obs
+
+class RewardCheetah(BaseRewarder):
+
+	@property
+	def action(self):
+		return self.prms['sim'].action if hasattr(self.prms['sim'], 'action') else np.zeros((1,2))
+
+	@property
+	def body_comvel(self):
+		return self.prms['sim'].body_comvel if hasattr(self.prms['sim'], 'body_comvel') else 0
+
+	@overrides
+	def get(self):
+		ctrl_cost = 1e-1 * 0.5 * np.sum(np.square(self.action))
+		run_cost = -1 * self.body_comvel[0]
+		return -(ctrl_cost + run_cost)
+
+class ContinuousCheetahAction(BaseContinuousAction):
+	@overrides
+	def action_dim(self):
+		return 6
+
+	def minval(self):
+		return -1.25
+
+	def maxval(self):
+		return 1.1
+
+	@overrides
+	def process(self, action):
+		return action
+
+# TODO(jasmine): fix this so it initializes (is it called at the start of each epoch? find out)
+class InitCheetah(BaseInitializer):
+
+	def reset_mujoco(self):
+		self.simulator.model.data.qpos = self.simulator.init_qpos + 0.01 * np.random.normal(size=self.simulator.init_qpos.shape)
+		self.simulator.model.data.qvel = self.simulator.init_qvel + 0.1 * np.random.normal(size=self.simulator.init_qvel.shape)
+		self.simulator.model.data.qacc = self.simulator.init_qacc
+		self.simulator.model.data.ctrl = self.simulator.init_ctrl
+
+	@overrides
+	def sample_env_init(self):
+		self.reset_mujoco()
+		self.simulator.model.forward()
+
+def get_environment(max_episode_length=100, initPrms={}, obsPrms={}, actPrms={}):
+	sim = HalfCheetahSimulator()
+	initObj = InitCheetah(sim, initPrms)
+	obsObj = CheetahIm(sim, obsPrms)
+	rewPrms = { 'sim': sim }
+	rewObj = RewardCheetah(sim, rewPrms)
+	actObj = ContinuousCheetahAction(actPrms)
+	env = BaseEnvironment(sim, initObj, obsObj, rewObj, actObj, 
+		params={'max_episode_length':max_episode_length})
+	return env
diff --git a/envs/move_agent.py b/envs/move_agent.py
index 28f1945..e8c78ff 100644
--- a/envs/move_agent.py
+++ b/envs/move_agent.py
@@ -1,6 +1,8 @@
-from core.base_environment import *
+from rlmaster.core.base_environment import *
 import numpy as np
 from overrides import overrides
+from rllab.mujoco_py import MjModel
+
 from pyhelper_fns import vis_utils
 
 def str2action(cmd):
@@ -54,7 +56,6 @@ def process(self, action):
       raise Exception('Action %s not recognized' % action)
     ctrl = np.array(ctrl).reshape((2,))
     return ctrl 
- 
 
 class ContinuousAction(BaseContinuousAction):
   @overrides
@@ -65,6 +66,11 @@ def action_dim(self):
   def process(self, action):
     return action  
 
+  def minval(self):
+    return -1
+
+  def maxval(self):
+    return 1
 
 class MoveTeleportSimulator(BaseSimulator):
   def __init__(self, **kwargs):
@@ -79,7 +85,7 @@ def __init__(self, **kwargs):
     #Manipulate radius
     self._manipulate_radius = 0.2
     #Image size
-    self._imSz = 64 
+    self._imSz = 32 
     self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8)      
 
   def object_names(self):
@@ -148,7 +154,6 @@ def _setup_renderer(self):
   def render(self):
     self._canvas._display(self.get_image())
 
-  
 class InitFixed(BaseInitializer):
   @overrides
   def sample_env_init(self):
@@ -165,7 +170,6 @@ def sample_env_init(self):
       self.simulator._pos[k] = range_mag * self.random.rand(2,) + \
                                self.simulator._range_min
 
-
 class ObsState(BaseObservation):
   @overrides
   def ndim(self):
@@ -180,22 +184,21 @@ def observation(self):
     for i, k in enumerate(self.simulator._pos.keys()):
       obs[2*i, 2*i + 2] = self.simulator._pos[k].copy()
     return obs
-  
  
 class ObsIm(BaseObservation):
   @overrides
   def ndim(self):
     dim = {}
-    dim['im'] = (self.simulator._imSz, self.simulator._imSz, 3)
+    # dim['im'] = (self.simulator._imSz, self.simulator._imSz, 3)
+    dim['im'] = (3072, 1)
     return dim
 
   @overrides
   def observation(self):
     obs = {}
-    obs['im'] =  self.simulator.get_image()
+    obs['im'] =  self.simulator.get_image().flatten()
     return obs
 
-
 class RewardSimple(BaseRewarder):
   #The radius around the goal in which reward is provided to the agent.
   @property
@@ -208,15 +211,15 @@ def get(self):
       return 1
     else:
       return 0 
- 
 
 def get_environment(initName='InitRandom', obsName='ObsIm', rewName='RewardSimple',
-                    actType='DiscreteActionFour', max_episode_length=100,
-                    initPrms={}, obsPrms={}, rewPrms={}, actPrms={}):
-
-  sim     = MoveTeleportSimulator()
+                    actType='DiscreteActionFour', simName='MoveTeleportSimulator',
+                    max_episode_length=100, initPrms={}, obsPrms={}, rewPrms={}, 
+                    actPrms={}):
+  sim     = globals()[simName]()
   initObj = globals()[initName](sim, initPrms)
   obsObj  = globals()[obsName](sim, obsPrms)
+  rewPrms = { 'sim': sim }
   rewObj  = globals()[rewName](sim, rewPrms)
   actObj  = globals()[actType](actPrms)
   env     = BaseEnvironment(sim, initObj, obsObj, rewObj, actObj, 
diff --git a/envs/mujoco_envs/xmls/half_cheetah.xml b/envs/mujoco_envs/xmls/half_cheetah.xml
new file mode 100644
index 0000000..5f7b25f
--- /dev/null
+++ b/envs/mujoco_envs/xmls/half_cheetah.xml
@@ -0,0 +1,95 @@
+<!-- Cheetah Model
+
+    The state space is populated with joints in the order that they are
+    defined in this file. The actuators also operate on joints.
+
+    State-Space (name/joint/parameter):
+        - rootx     slider      position (m)
+        - rootz     slider      position (m)
+        - rooty     hinge       angle (rad)
+        - bthigh    hinge       angle (rad)
+        - bshin     hinge       angle (rad)
+        - bfoot     hinge       angle (rad)
+        - fthigh    hinge       angle (rad)
+        - fshin     hinge       angle (rad)
+        - ffoot     hinge       angle (rad)
+        - rootx     slider      velocity (m/s)
+        - rootz     slider      velocity (m/s)
+        - rooty     hinge       angular velocity (rad/s)
+        - bthigh    hinge       angular velocity (rad/s)
+        - bshin     hinge       angular velocity (rad/s)
+        - bfoot     hinge       angular velocity (rad/s)
+        - fthigh    hinge       angular velocity (rad/s)
+        - fshin     hinge       angular velocity (rad/s)
+        - ffoot     hinge       angular velocity (rad/s)
+
+    Actuators (name/actuator/parameter):
+        - bthigh    hinge       torque (N m)
+        - bshin     hinge       torque (N m)
+        - bfoot     hinge       torque (N m)
+        - fthigh    hinge       torque (N m)
+        - fshin     hinge       torque (N m)
+        - ffoot     hinge       torque (N m)
+
+-->
+<mujoco model='cheetah'>
+  <compiler inertiafromgeom='true' coordinate='local' angle='radian' settotalmass='14' />
+  <default>
+    <joint limited='true' damping='.01' armature='.1' stiffness='8' solreflimit='.02 1' solimplimit='0 .8 .03' />
+    <geom contype='1' conaffinity='0' condim='3' friction='.4 .1 .1' rgba='0.8 0.6 .4 1' solimp='0.0 0.8 0.01' solref='0.02 1' />
+    <motor ctrlrange='-1 1' ctrllimited='true' />
+  </default>
+  <size nstack='300000' nuser_geom='1' />
+  <option timestep='0.01' gravity='0 0 -9.81' />
+  <asset>
+    <texture type="skybox" builtin="gradient" width="100" height="100" rgb1="1 1 1" rgb2="0 0 0" />
+    <texture name="texgeom" type="cube" builtin="flat" mark="cross" width="127" height="1278" rgb1="0.8 0.6 0.4" rgb2="0.8 0.6 0.4" markrgb="1 1 1" random="0.01" />
+    <texture name="texplane" type="2d" builtin="checker" rgb1="0 0 0" rgb2="0.8 0.8 0.8" width="100" height="100" />
+    <material name='MatPlane' texture="texplane" shininess="1" texrepeat="60 60" specular="1"  reflectance="0.5" />
+    <material name='geom' texture="texgeom" texuniform="true" />
+  </asset>
+  <worldbody>
+    <light directional="true" cutoff="100" exponent="1" diffuse="1 1 1" specular=".1 .1 .1" pos="0 0 1.3" dir="-0 0 -1.3" />
+    <geom name='floor' material="MatPlane" pos='0 0 0' size='40 40 40' type='plane' conaffinity='1' rgba='0.8 0.9 0.8 1' condim='3' />
+    <body name='torso' pos='0 0 .7'>
+      <joint name='rootx' type='slide' pos='0 0 0' axis='1 0 0' limited='false' damping='0' armature='0' stiffness='0' />
+      <joint name='rootz' type='slide' pos='0 0 0' axis='0 0 1' limited='false' damping='0' armature='0' stiffness='0' />
+      <joint name='rooty' type='hinge' pos='0 0 0' axis='0 1 0' limited='false' damping='0' armature='0' stiffness='0' />
+      <geom name='torso' type='capsule' fromto='-.5 0 0 .5 0 0' size='0.046' />
+      <geom name='head' type='capsule' pos='.6 0 .1' axisangle='0 1 0 .87' size='0.046 .15' />
+      <!-- <site name='tip'  pos='.15 0 .11'/> -->
+      <body name='bthigh' pos='-.5 0 0'>
+        <joint name='bthigh' type='hinge' pos='0 0 0' axis='0 1 0' range='-.52 1.05' stiffness='240' damping='6' />
+        <geom name='bthigh' type='capsule' pos='.1 0 -.13' axisangle='0 1 0 -3.8' size='0.046 .145' />
+        <body name='bshin' pos='.16 0 -.25'>
+          <joint name='bshin' type='hinge' pos='0 0 0' axis='0 1 0' range='-.785 .785' stiffness='180' damping='4.5' />
+          <geom name='bshin' type='capsule' pos='-.14 0 -.07' axisangle='0 1 0 -2.03' size='0.046 .15' rgba='0.9 0.6 0.6 1' />
+          <body name='bfoot' pos='-.28 0 -.14'>
+            <joint name='bfoot' type='hinge' pos='0 0 0' axis='0 1 0' range='-.4 .785' stiffness='120' damping='3' />
+            <geom name='bfoot' type='capsule' pos='.03 0 -.097' axisangle='0 1 0 -.27' size='0.046 .094' rgba='0.9 0.6 0.6 1' />
+          </body>
+        </body>
+      </body>
+      <body name='fthigh' pos='.5 0 0'>
+        <joint name='fthigh' type='hinge' pos='0 0 0' axis='0 1 0' range='-1 .7' stiffness='180' damping='4.5' />
+        <geom name='fthigh' type='capsule' pos='-.07 0 -.12' axisangle='0 1 0 .52' size='0.046 .133' />
+        <body name='fshin' pos='-.14 0 -.24'>
+          <joint name='fshin' type='hinge' pos='0 0 0' axis='0 1 0' range='-1.2 .87' stiffness='120' damping='3' />
+          <geom name='fshin' type='capsule' pos='.065 0 -.09' axisangle='0 1 0 -.6' size='0.046 .106' rgba='0.9 0.6 0.6 1' />
+          <body name='ffoot' pos='.13 0 -.18'>
+            <joint name='ffoot' type='hinge' pos='0 0 0' axis='0 1 0' range='-.5 .5' stiffness='60' damping='1.5' />
+            <geom name='ffoot' type='capsule' pos='.045 0 -.07' axisangle='0 1 0 -.6' size='0.046 .07' rgba='0.9 0.6 0.6 1' />
+          </body>
+        </body>
+      </body>
+    </body>
+  </worldbody>
+  <actuator>
+    <motor name='bthigh' joint='bthigh' gear='120' />
+    <motor name='bshin' joint='bshin' gear='90' />
+    <motor name='bfoot' joint='bfoot' gear='60' />
+    <motor name='fthigh' joint='fthigh' gear='120' />
+    <motor name='fshin' joint='fshin' gear='60' />
+    <motor name='ffoot' joint='ffoot' gear='30' />
+  </actuator>
+</mujoco>
diff --git a/envs/reacher_agent.py b/envs/reacher_agent.py
new file mode 100644
index 0000000..26da313
--- /dev/null
+++ b/envs/reacher_agent.py
@@ -0,0 +1,214 @@
+from rlmaster.core.base_environment import *
+import numpy as np
+from overrides import overrides
+from pyhelper_fns import vis_utils
+
+def contains(obj1, obj2):
+  if not obj1 or not obj2:
+    return False
+  x1, y1 = obj1.pos[0], obj1.pos[1]
+  x2, y2 = obj2.pos[0], obj2.pos[1]
+  s1, s2 = obj1.size, obj2.size
+  x1, y1 = x1, y1
+  x2, y2 = x2, y2
+  if x2 + 2 * s2 <= x1 or x1 + 2 * s1 <= x2:
+    return False
+  if y2 + 2 * s2 <= y1 or y1 + 2 * s1 <= y2:
+    return False
+  return True
+
+class StackedBox():
+  def __init__(self, pos, size=2):
+    self.pos = pos.reshape((3,))
+    self.size = size
+
+class ReacherArm():
+  def __init__(self, pos, size=2):
+    self.pos = pos.reshape((2,))
+    self.size = size
+
+    self.block = None
+
+class SimpleReacherSimulator(BaseSimulator):
+  def __init__(self, num_blocks=2, **kwargs):
+    # should always have same size blocks (width = height = length = n)
+    super(SimpleReacherSimulator, self).__init__(**kwargs)
+    self._imSz = 32
+    self.width = self._imSz
+    self.height = self._imSz
+    self.move_block = [StackedBox(np.zeros(3)) for i in range(num_blocks)]
+    self.goal = StackedBox(np.zeros(3))
+    self.reacher_arm = ReacherArm(np.zeros(2))
+
+    self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8)
+    self._range_min = self.reacher_arm.size
+    self._range_max = self._imSz - self.reacher_arm.size
+
+  @property
+  def done(self):
+    for block in self.move_block:
+      if block.pos[2] != self.goal.size:
+        return False
+    return True
+
+  @overrides
+  def step(self, action):
+    action = action.reshape((2,))
+
+    on_goal = contains(self.reacher_arm.block, self.goal)
+    on_block = None
+    for block in self.move_block:
+      if contains(self.reacher_arm, block):
+        on_block = block
+        break
+
+    if self.reacher_arm.block is None and on_block is not None and not contains(on_block, self.goal):
+      self.reacher_arm.block = on_block
+
+    if on_goal:
+      self.reacher_arm.block.pos[2] = self.goal.size
+      self.reacher_arm.block = None
+
+    self.reacher_arm.pos = np.clip(self.reacher_arm.pos + action, self._range_min, self._range_max)
+
+    if self.reacher_arm.block is not None:
+      block = self.reacher_arm.block
+      action = np.append(action, 0)
+      block.pos = np.clip(block.pos + action, self._range_min, self._range_max)
+      block.pos[2] = self._imSz
+  
+  def _plot_object(self, coords, box, color='r'):
+    x, y = coords
+    mnx, mxx  = int(max(x - box[0], 0)), int(min(self._imSz, x + box[0]))
+    mny, mxy  = int(max(y - box[1], 0)), int(min(self._imSz, y + box[1]))
+    if color == 'r':
+      self._im[mny:mxy, mnx:mxx, 0] = 255
+    elif color == 'g':
+      self._im[mny:mxy, mnx:mxx, 1] = 255
+    else:
+      self._im[mny:mxy, mnx:mxx, 2] = 255
+
+  @overrides
+  def get_image(self):
+    # other blocks in red, active block in green
+    imSz = self._imSz
+    self._im = np.zeros((imSz, imSz, 3), dtype=np.uint8)
+    x, y = self.goal.pos[0], self.goal.pos[1]
+    self._plot_object((x, y), (2, 2), 'r')
+    for block in self.move_block:
+      x, y = block.pos[0], block.pos[1]
+      self._plot_object((x, y), (2, 2), 'g')
+    x, y = self.reacher_arm.pos[0], self.reacher_arm.pos[1]
+    self._plot_object((x, y), (2, 2), 'b')
+    return self._im.copy()
+
+  @overrides 
+  def _setup_renderer(self):
+    self._canvas = vis_utils.MyAnimation(None, height=self._imSz, width=self._imSz)
+
+  @overrides
+  def render(self):
+    if not hasattr(self, '_canvas'):
+      self._setup_renderer()
+    self._canvas._display(self.get_image())
+
+class StackerIm(BaseObservation):
+
+  @overrides
+  def ndim(self):
+    dim = {}
+    dim['im'] = (self.simulator._imSz, self.simulator._imSz, 3)
+    return dim
+
+  @overrides
+  def observation(self):
+    obs = {}
+    obs['im'] = self.simulator.get_image()
+    return obs
+
+class StateIm(BaseObservation):
+
+  @overrides
+  def ndim(self):
+    dim = {}
+    num_blocks = len(self.simulator.move_block)
+    dim['im'] = (num_blocks * 3 + 6, 1)
+    return dim
+
+  def scale(self, obs):
+    obs = np.copy(obs)
+    obs = obs / float(self.simulator._imSz) * 2 - 1
+    return obs
+
+  @overrides
+  def observation(self):
+    obs = {}
+
+    m_block = self.simulator.move_block
+    g_pos = self.simulator.goal.pos
+    r_pos = self.simulator.reacher_arm.pos
+    new_im = np.append(self.scale(g_pos), self.scale(r_pos))
+    new_im = np.append(new_im, np.array([1 if self.simulator.reacher_arm.block is not None else 0]))
+    for b in m_block:
+      new_im = np.append(new_im, self.scale(b.pos))
+    obs['im'] = new_im
+    return obs
+
+####@pulkitag: This is good. 
+class RewardReacher(BaseRewarder):
+
+  @property
+  def contains_block(self):
+    if hasattr(self.prms['sim'], 'move_block') and hasattr(self.prms['sim'], 'goal'):
+      move_block = self.prms['sim'].move_block
+      goal = self.prms['sim'].goal
+      reward = 1 if all([block.pos[2] == goal.size for block in move_block]) else 0
+      return reward
+    return 0
+
+  @overrides
+  def get(self):
+    return self.contains_block
+
+# need additional actions: to pick up and to drop the block
+class ContinuousReacherAction(BaseContinuousAction):
+  @overrides
+  def action_dim(self):
+    return 2
+
+  @overrides
+  def process(self, action):
+    return action
+
+  def minval(self):
+    return -1
+
+  def maxval(self):
+    return 1
+
+class InitReacher(BaseInitializer):
+  @overrides
+  def sample_env_init(self):
+    sim = self.simulator['sim']
+    sim.reacher_arm.pos = np.random.randint(sim._range_min, sim._range_max, size=2)
+    sim.reacher_arm.block = None
+    
+    sim.goal.pos = np.random.randint(sim._range_min, sim._range_max, size=3)
+    sim.goal.pos[2] = 0
+    for block in sim.move_block:
+      block.pos = np.random.randint(sim._range_min, sim._range_max, size=3)
+      block.pos[2] = 0
+      if contains(sim.goal, block):
+        block.pos[2] = sim.goal.size
+
+def get_environment(obsType='StateIm', max_episode_length=100, initPrms={}, obsPrms={}, rewPrms={}, actPrms={}):
+  sim = SimpleReacherSimulator()
+  initPrms = {'sim' : sim}
+  initObj = InitReacher(initPrms)
+  obsObj = globals()[obsType](sim, obsPrms)
+  rewPrms = { 'sim': sim }
+  rewObj = RewardReacher(sim, rewPrms)
+  actObj = ContinuousReacherAction(actPrms)
+  env = BaseEnvironment(sim, initObj, obsObj, rewObj, actObj,
+    params={'max_episode_length':max_episode_length})
+  return env
diff --git a/envs/stacker_agent.py b/envs/stacker_agent.py
new file mode 100644
index 0000000..d8b81ce
--- /dev/null
+++ b/envs/stacker_agent.py
@@ -0,0 +1,168 @@
+from rlmaster.core.base_environment import *
+import numpy as np
+from overrides import overrides
+from pyhelper_fns import vis_utils
+
+def contains(obj1, obj2):
+  x1, y1 = obj1.pos[0], obj1.pos[1]
+  x2, y2 = obj2.pos[0], obj2.pos[1]
+  s1, s2 = obj1.size, obj2.size
+  x1, y1 = x1 - s1, y1 - s1
+  x2, y2 = x2 - s2, y2 - s2
+  if x2 + s2 <= x1 or x1 + s1 <= x2:
+    return False
+  if y2 + s2 <= y1 or y1 + s1 <= y2:
+    return False
+  return True
+
+class StackedBox():
+  def __init__(self, pos, size=2):
+    # position is center of the box
+    self.pos = pos.reshape((3,))
+    self.size = size
+
+class SimpleStackerSimulator(BaseSimulator):
+  def __init__(self, **kwargs):
+    super(SimpleStackerSimulator, self).__init__(**kwargs)
+    self._imSz = 32
+    self.width = self._imSz
+    self.height = self._imSz
+    self.move_block = StackedBox(np.zeros(3))
+    self.other_block = StackedBox(np.zeros(3))
+
+    self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8)
+    self._range_min = self.move_block.size
+    self._range_max = self._imSz - self.move_block.size
+
+  @overrides
+  def step(self, pos):
+    #####@pulkitag: You should make use of BaseDiscreteAction and BaseContinuousAction
+    #### to input the actions. 
+    pos = pos.reshape((2,))
+    pos = np.append(pos, np.zeros(1))
+    
+    new_pos = np.clip(self.move_block.pos + pos, self._range_min, self._range_max)
+    new_pos[2] = 0
+    self.move_block.pos = new_pos
+    if contains(self.move_block, self.other_block):
+      self.move_block.pos[2] = self.other_block.size
+
+  def _plot_object(self, coords, color='r'):
+    x, y = coords
+    mnx, mxx  = max(x - self.move_block.size, 0), min(self._imSz, x + self.move_block.size)
+    mny, mxy  = max(y - self.move_block.size, 0), min(self._imSz, y + self.move_block.size)
+    if color == 'r':
+      self._im[mny:mxy, mnx:mxx, 0] = 255
+    elif color == 'g':
+      self._im[mny:mxy, mnx:mxx, 1] = 255
+    else:
+      self._im[mny:mxy, mnx:mxx, 2] = 255
+
+  @overrides
+  def get_image(self):
+    imSz = self._imSz
+    rng = np.linspace(self._range_min, self._range_max, imSz)
+    x_1, y_1 = self.other_block.pos[0], self.other_block.pos[1]
+    x_2, y_2 = self.move_block.pos[0], self.move_block.pos[1]
+    self._im = np.zeros((imSz, imSz, 3), dtype=np.uint8)
+    self._plot_object((x_1, y_1), 'r')
+    self._plot_object((x_2, y_2), 'g')
+    return self._im.copy()
+
+  @overrides 
+  def _setup_renderer(self):
+    self._canvas = vis_utils.MyAnimation(None, height=self._imSz, width=self._imSz)
+
+  @overrides
+  def render(self):
+    self._canvas._display(self.get_image())
+
+class StackerIm(BaseObservation):
+
+  @overrides
+  def ndim(self):
+    dim = {}
+    dim['im'] = (self.simulator._imSz, self.simulator._imSz, 3)
+    return dim
+
+  @overrides
+  def observation(self):
+    obs = {}
+    obs['im'] = self.simulator.get_image()
+    return obs
+
+class StateIm(BaseObservation):
+
+  @overrides
+  def ndim(self):
+    dim = {}
+    dim['im'] = (6, 1)
+    return dim
+
+  def scale(self, obs):
+    norm = np.linalg.norm(obs)
+    obs = np.copy(obs)
+    obs = obs / 16 - 1
+    return obs
+
+  @overrides
+  def observation(self):
+    obs = {}
+    m_pos = self.simulator.move_block.pos
+    o_pos = self.simulator.other_block.pos
+    new_im = np.append(self.scale(m_pos), self.scale(o_pos))
+    obs['im'] = new_im
+    return obs
+
+
+####@pulkitag: This is good. 
+class RewardStacker(BaseRewarder):
+  
+  @property
+  def block_height(self):
+    if hasattr(self.prms['sim'], 'other_block') and hasattr(self.prms['sim'], 'move_block'):
+      return 1 if self.prms['sim'].move_block.pos[2] == self.prms['sim'].other_block.size else 0
+    return 0
+
+  @overrides
+  def get(self):
+    return self.block_height
+
+class ContinuousStackerAction(BaseContinuousAction):
+  @overrides
+  def action_dim(self):
+    return 2
+
+  @overrides
+  def process(self, action):
+    return action
+
+  def minval(self):
+    return -1
+
+  def maxval(self):
+    return 1
+
+class InitStacker(BaseInitializer):
+  @overrides
+  def sample_env_init(self):
+    sim = self.simulator['sim']
+    size = sim.move_block.size
+    sim.move_block.pos = np.random.randint(sim._range_min, sim._range_max, size=3)
+    sim.move_block.pos[2] = 0
+    sim.other_block.pos = np.random.randint(sim._range_min, sim._range_max, size=3)
+    sim.other_block.pos[2] = 0
+    if contains(sim.move_block, sim.other_block):
+      sim.move_block.pos[2] = size
+
+def get_environment(obsType='StateIm', max_episode_length=100, initPrms={}, obsPrms={}, rewPrms={}, actPrms={}):
+  sim = SimpleStackerSimulator()
+  rewPrms = { 'sim': sim }
+  initPrms = { 'sim': sim }
+  initObj = InitStacker(initPrms)
+  obsObj = globals()[obsType](sim, obsPrms)
+  rewObj = RewardStacker(sim, rewPrms)
+  actObj = ContinuousStackerAction(actPrms)
+  env = BaseEnvironment(sim, initObj, obsObj, rewObj, actObj,
+    params={'max_episode_length':max_episode_length})
+  return env