From c2ef0b65f9013f69b8deb93759f69e9c27568ef9 Mon Sep 17 00:00:00 2001 From: JasmineDeng Date: Mon, 20 Mar 2017 18:19:29 -0700 Subject: [PATCH 01/10] implemented cheetah --- core/gym_wrapper.py | 19 +++--- envs/move_agent.py | 77 +++++++++++++++++++-- envs/mujoco_envs/xmls/half_cheetah.xml | 95 ++++++++++++++++++++++++++ 3 files changed, 176 insertions(+), 15 deletions(-) create mode 100644 envs/mujoco_envs/xmls/half_cheetah.xml diff --git a/core/gym_wrapper.py b/core/gym_wrapper.py index 11d38b6..dd9d9d6 100644 --- a/core/gym_wrapper.py +++ b/core/gym_wrapper.py @@ -1,6 +1,10 @@ import numpy as np -from core import base_environment +import rlmaster.core.base_environment as base_environment from gym import spaces +from rllab.spaces.box import Box +from rllab.core.serializable import Serializable +from rllab.misc.overrides import overrides +from rllab.misc import logger class GymWrapper(object): def __init__(self, env): @@ -11,16 +15,15 @@ def __init__(self, env): else: assert isinstance(self.env.action_processor, base_environment.BaseContinuousAction) - self.action_space = spaces.Box( + self.action_space = Box( low = env.action_processor.minval(), high = env.action_processor.maxval(), - shape=(env.action_processor.action_dim(), - env.action_processor.num_actions())) + shape=(env.action_processor.action_dim())) obsNdim = self.env.observation_ndim() - obsKeys = obsNdim.keys() + obsKeys = list(obsNdim.keys()) assert len(obsKeys) == 1, 'gym only supports one observation type' self._obsKey = obsKeys[0] - self.observation_space = spaces.Box(low=0, high=255, shape=obsNdim[obsKeys[0]]) + self.observation_space = Box(low=0, high=255, shape=obsNdim[obsKeys[0]]) @property def frameskip(self): @@ -68,7 +71,7 @@ def step(self, action): def viewer_setup(self): - self.env._renderer_setup() + self.env.setup_renderer() def render(self): @@ -76,5 +79,5 @@ def render(self): def _render(self): + print('i am rendering') return self.env.render() - diff --git a/envs/move_agent.py b/envs/move_agent.py index 28f1945..01811cb 100644 --- a/envs/move_agent.py +++ b/envs/move_agent.py @@ -1,6 +1,8 @@ -from core.base_environment import * +from rlmaster.core.base_environment import * import numpy as np from overrides import overrides +from rllab.mujoco_py import MjModel + from pyhelper_fns import vis_utils def str2action(cmd): @@ -54,7 +56,6 @@ def process(self, action): raise Exception('Action %s not recognized' % action) ctrl = np.array(ctrl).reshape((2,)) return ctrl - class ContinuousAction(BaseContinuousAction): @overrides @@ -65,6 +66,11 @@ def action_dim(self): def process(self, action): return action + def minval(self): + return -1 + + def maxval(self): + return 1 class MoveTeleportSimulator(BaseSimulator): def __init__(self, **kwargs): @@ -79,7 +85,7 @@ def __init__(self, **kwargs): #Manipulate radius self._manipulate_radius = 0.2 #Image size - self._imSz = 64 + self._imSz = 32 self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8) def object_names(self): @@ -148,6 +154,39 @@ def _setup_renderer(self): def render(self): self._canvas._display(self.get_image()) +class HalfCheetahSimulator(MoveTeleportSimulator): + def __init__(self, **kwargs): + super(HalfCheetahSimulator, self).__init__(**kwargs) + self.model = MjModel('rlmaster/envs/mujoco_envs/xmls/half_cheetah.xml') + self._pos = {} + self._pos['torso'] = np.zeros((3,)) + self._range_min = -1 + self._range_max = 1 + + self._imSz = 32 + self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8) + self.body_comvel = 0 + self.action = np.zeros((1, 2)) + + @overrides + def step(self, ctrl): + self.model.data.ctrl = ctrl + for i in range(10): + self.model.step() + self.model.forward() + ind = self.model.body_names.index('torso') + self._pos['torso'] = self.model.body_pos[ind] + self.body_comvel = self.model.body_comvels[ind] + self.action = ctrl + + @overrides + def get_image(self): + imSz = self._imSz + rng = np.linspace(self._range_min, self._range_max, imSz) + g_x, g_y = self._get_bin(rng, self._pos['torso']) + self._im = np.zeros((imSz, imSz, 3), dtype=np.uint8) + self._plot_object((g_x, g_y), 'r') + return self._im.copy() class InitFixed(BaseInitializer): @overrides @@ -195,6 +234,13 @@ def observation(self): obs['im'] = self.simulator.get_image() return obs +class CheetahIm(ObsIm): + @overrides + def observation(self): + obs = {} + obs['im'] = self.simulator.get_image().flatten() + return obs + class RewardSimple(BaseRewarder): #The radius around the goal in which reward is provided to the agent. @@ -208,15 +254,32 @@ def get(self): return 1 else: return 0 + +class RewardCheetah(BaseRewarder): + + @property + def action(self): + return self.prms['sim'].action if hasattr(self.prms['sim'], 'action') else np.zeros((1,2)) + + @property + def body_comvel(self): + return self.prms['sim'].body_comvel if hasattr(self.prms['sim'], 'body_comvel') else 0 + + @overrides + def get(self): + ctrl_cost = 1e-1 * 0.5 * np.sum(np.square(self.action)) + run_cost = -1 * self.body_comvel[0] + return ctrl_cost + run_cost def get_environment(initName='InitRandom', obsName='ObsIm', rewName='RewardSimple', - actType='DiscreteActionFour', max_episode_length=100, - initPrms={}, obsPrms={}, rewPrms={}, actPrms={}): - - sim = MoveTeleportSimulator() + actType='DiscreteActionFour', simName='MoveTeleportSimulator', + max_episode_length=100, initPrms={}, obsPrms={}, rewPrms={}, + actPrms={}): + sim = globals()[simName]() initObj = globals()[initName](sim, initPrms) obsObj = globals()[obsName](sim, obsPrms) + rewPrms = { 'sim': sim } rewObj = globals()[rewName](sim, rewPrms) actObj = globals()[actType](actPrms) env = BaseEnvironment(sim, initObj, obsObj, rewObj, actObj, diff --git a/envs/mujoco_envs/xmls/half_cheetah.xml b/envs/mujoco_envs/xmls/half_cheetah.xml new file mode 100644 index 0000000..5f7b25f --- /dev/null +++ b/envs/mujoco_envs/xmls/half_cheetah.xml @@ -0,0 +1,95 @@ + + + + + + + + + + From c032a98fe5b0d668b1266552783fb2b07b05cd62 Mon Sep 17 00:00:00 2001 From: JasmineDeng Date: Thu, 23 Mar 2017 18:00:10 -0700 Subject: [PATCH 02/10] moved cheetah and stacker to new files, created stacker environment --- envs/cheetah_agent.py | 59 ++++++++++++++++++++++++++++++ envs/move_agent.py | 61 ++----------------------------- envs/stacker_agent.py | 84 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 145 insertions(+), 59 deletions(-) create mode 100644 envs/cheetah_agent.py create mode 100644 envs/stacker_agent.py diff --git a/envs/cheetah_agent.py b/envs/cheetah_agent.py new file mode 100644 index 0000000..42a7805 --- /dev/null +++ b/envs/cheetah_agent.py @@ -0,0 +1,59 @@ +from move_agent import MoveTeleportSimulator, ObsIm, BaseRewarder + +class HalfCheetahSimulator(MoveTeleportSimulator): + def __init__(self, **kwargs): + super(HalfCheetahSimulator, self).__init__(**kwargs) + self.model = MjModel('../rlmaster/envs/mujoco_envs/xmls/half_cheetah.xml') + self._pos = {} + self._pos['torso'] = np.zeros((3,)) + self._range_min = -1 + self._range_max = 1 + + self._imSz = 32 + self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8) + self.body_comvel = 0 + self.action = np.zeros((1, 2)) + + @overrides + def step(self, ctrl): + self.model.data.ctrl = ctrl + for i in range(10): + self.model.step() + self.model.forward() + ind = self.model.body_names.index('torso') + self._pos['torso'] = self.model.body_pos[ind] + self.body_comvel = self.model.body_comvels[ind] + self.action = ctrl + + @overrides + def get_image(self): + imSz = self._imSz + rng = np.linspace(self._range_min, self._range_max, imSz) + g_x, g_y = self._get_bin(rng, self._pos['torso']) + self._im = np.zeros((imSz, imSz, 3), dtype=np.uint8) + self._plot_object((g_x, g_y), 'r') + return self._im.copy() + +class CheetahIm(ObsIm): + @overrides + def observation(self): + obs = {} + obs['im'] = self.simulator.get_image().flatten() + return obs + +class RewardCheetah(BaseRewarder): + + @property + def action(self): + return self.prms['sim'].action if hasattr(self.prms['sim'], 'action') else np.zeros((1,2)) + + @property + def body_comvel(self): + return self.prms['sim'].body_comvel if hasattr(self.prms['sim'], 'body_comvel') else 0 + + @overrides + def get(self): + ctrl_cost = 1e-1 * 0.5 * np.sum(np.square(self.action)) + run_cost = -1 * self.body_comvel[0] + return ctrl_cost + run_cost + \ No newline at end of file diff --git a/envs/move_agent.py b/envs/move_agent.py index 01811cb..3f1c725 100644 --- a/envs/move_agent.py +++ b/envs/move_agent.py @@ -3,6 +3,8 @@ from overrides import overrides from rllab.mujoco_py import MjModel +from cheetah_agent import HalfCheetahSimulator, CheetahIm, RewardCheetah +from stacker_agent import SimpleStackerSimulator, StackerIm, RewardStacker from pyhelper_fns import vis_utils def str2action(cmd): @@ -154,40 +156,6 @@ def _setup_renderer(self): def render(self): self._canvas._display(self.get_image()) -class HalfCheetahSimulator(MoveTeleportSimulator): - def __init__(self, **kwargs): - super(HalfCheetahSimulator, self).__init__(**kwargs) - self.model = MjModel('rlmaster/envs/mujoco_envs/xmls/half_cheetah.xml') - self._pos = {} - self._pos['torso'] = np.zeros((3,)) - self._range_min = -1 - self._range_max = 1 - - self._imSz = 32 - self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8) - self.body_comvel = 0 - self.action = np.zeros((1, 2)) - - @overrides - def step(self, ctrl): - self.model.data.ctrl = ctrl - for i in range(10): - self.model.step() - self.model.forward() - ind = self.model.body_names.index('torso') - self._pos['torso'] = self.model.body_pos[ind] - self.body_comvel = self.model.body_comvels[ind] - self.action = ctrl - - @overrides - def get_image(self): - imSz = self._imSz - rng = np.linspace(self._range_min, self._range_max, imSz) - g_x, g_y = self._get_bin(rng, self._pos['torso']) - self._im = np.zeros((imSz, imSz, 3), dtype=np.uint8) - self._plot_object((g_x, g_y), 'r') - return self._im.copy() - class InitFixed(BaseInitializer): @overrides def sample_env_init(self): @@ -204,7 +172,6 @@ def sample_env_init(self): self.simulator._pos[k] = range_mag * self.random.rand(2,) + \ self.simulator._range_min - class ObsState(BaseObservation): @overrides def ndim(self): @@ -234,14 +201,6 @@ def observation(self): obs['im'] = self.simulator.get_image() return obs -class CheetahIm(ObsIm): - @overrides - def observation(self): - obs = {} - obs['im'] = self.simulator.get_image().flatten() - return obs - - class RewardSimple(BaseRewarder): #The radius around the goal in which reward is provided to the agent. @property @@ -255,22 +214,6 @@ def get(self): else: return 0 -class RewardCheetah(BaseRewarder): - - @property - def action(self): - return self.prms['sim'].action if hasattr(self.prms['sim'], 'action') else np.zeros((1,2)) - - @property - def body_comvel(self): - return self.prms['sim'].body_comvel if hasattr(self.prms['sim'], 'body_comvel') else 0 - - @overrides - def get(self): - ctrl_cost = 1e-1 * 0.5 * np.sum(np.square(self.action)) - run_cost = -1 * self.body_comvel[0] - return ctrl_cost + run_cost - def get_environment(initName='InitRandom', obsName='ObsIm', rewName='RewardSimple', actType='DiscreteActionFour', simName='MoveTeleportSimulator', diff --git a/envs/stacker_agent.py b/envs/stacker_agent.py new file mode 100644 index 0000000..fd00822 --- /dev/null +++ b/envs/stacker_agent.py @@ -0,0 +1,84 @@ +from move_agent import MoveTeleportSimulator, ObsIm, BaseRewarder + +class StackedBox(): + + def __init__(self, pos, width=3, height=3, length=3): + assert len(pos) == 3, 'must enter an xyz position' + + self.pos = pos + self.width = width + self.height = height + self.length = length + + def contains(self, new_pos): + x, y, z = self.pos + new_x, new_y, new_z = new_pos + return new_x >= x and new_x <= x + self.width \ + and new_y >= y and new_y <= y + self.height and new_z >= z and new_z <= z + self.length + +class SimpleStackerSimulator(MoveTeleportSimulator): + def __init__(self, **kwargs): + super(SimpleStackerSimulator, self).__init__(**kwargs) + self.width = self._imSz + self.height = self._imSz + self.block1 = StackedBox(np.random.randint(0, 32 - 3, size=3)) + self.block1.pos[2] = 0 + self.block2 = StackedBox(np.random.randint(0, 32 - 3, size=3)) + self.block2.pos[2] = 0 + if self.block1.contains(self.block2.pos): + self.block2.pos[2] = self.block1.height + + self._imSz = 32 + self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8) + self._range_min = 0 + self._range_max = 32 - self.block1.width + + @overrides + def step(self, pos): + assert len(pos) == 2, 'step takes in an xy position' + + x, y = min(pos[0], 32 - 3), min(pos[1], 32 - 3) + self.block2.pos = np.array([x, y, 0]) + if self.block1.contains(self.block2.pos): + self.block2.pos[2] = self.block1.height + + @overrides + def _plot_object(self, coords, color='r'): + x, y = coords + mnx, mxx = x, min(self._imSz, x + self.block1.width) + mny, mxy = y, min(self._imSz, y + self.block1.height) + if color == 'r': + self._im[mny:mxy, mnx:mxx, 0] = 255 + elif color == 'g': + self._im[mny:mxy, mnx:mxx, 1] = 255 + else: + self._im[mny:mxy, mnx:mxx, 2] = 255 + + @overrides + def get_image(self): + imSz = self._imSz + rng = np.linspace(self._range_min, self._range_max, imSz) + x_1, y_1 = self.block1.pos[0], self.block1.pos[1] + x_2, y_2 = self.block2.pos[0], self.block2.pos[1] + self._im = np.zeros((imSz, imSz, 3), dtype=np.uint8) + self._plot_object((x_1, y_1), 'r') + self._plot_object((x_2, y_2), 'g') + return self._im.copy() + +class StackerIm(ObsIm): + + @overrides + def observation(self): + obs = {} + obs['im'] = self.simulator.get_image().flatten() + return obs + +class RewardStacker(BaseRewarder): + + @property + def block_height(self): + return self.prms['sim'].block2.pos[2] if hasattr(self.prms['sim'], 'block2') else 0 + + @overrides + def get(self): + return self.block_height From 8e7298f91435e025acd3278f2156f258d6d46a67 Mon Sep 17 00:00:00 2001 From: JasmineDeng Date: Fri, 24 Mar 2017 11:29:44 -0700 Subject: [PATCH 03/10] allowed rendering for stacker agent --- envs/move_agent.py | 144 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 142 insertions(+), 2 deletions(-) diff --git a/envs/move_agent.py b/envs/move_agent.py index 3f1c725..a016e9e 100644 --- a/envs/move_agent.py +++ b/envs/move_agent.py @@ -3,8 +3,6 @@ from overrides import overrides from rllab.mujoco_py import MjModel -from cheetah_agent import HalfCheetahSimulator, CheetahIm, RewardCheetah -from stacker_agent import SimpleStackerSimulator, StackerIm, RewardStacker from pyhelper_fns import vis_utils def str2action(cmd): @@ -214,6 +212,148 @@ def get(self): else: return 0 +class StackedBox(): + + def __init__(self, pos, width=3, height=3, length=3): + assert len(pos) == 3, 'must enter an xyz position' + + self.pos = pos + self.width = width + self.height = height + self.length = length + + def contains(self, new_pos): + x, y, z = self.pos + new_x, new_y, new_z = new_pos + return new_x >= x and new_x <= x + self.width \ + and new_y >= y and new_y <= y + self.height and new_z >= z and new_z <= z + self.length + +class SimpleStackerSimulator(MoveTeleportSimulator): + def __init__(self, **kwargs): + super(SimpleStackerSimulator, self).__init__(**kwargs) + self.width = self._imSz + self.height = self._imSz + self.block1 = StackedBox(np.random.randint(0, 32 - 3, size=3)) + self.block1.pos[2] = 0 + self.block2 = StackedBox(np.random.randint(0, 32 - 3, size=3)) + self.block2.pos[2] = 0 + if self.block1.contains(self.block2.pos): + self.block2.pos[2] = self.block1.height + + self._imSz = 32 + self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8) + self._range_min = 0 + self._range_max = 32 - self.block1.width + + @overrides + def step(self, pos): + assert len(pos) == 2, 'step takes in an xy position' + + x, y = min(pos[0], 32 - 3), min(pos[1], 32 - 3) + self.block2.pos = np.array([x, y, 0]) + if self.block1.contains(self.block2.pos): + self.block2.pos[2] = self.block1.height + + @overrides + def _plot_object(self, coords, color='r'): + x, y = coords + mnx, mxx = x, min(self._imSz, x + self.block1.width) + mny, mxy = y, min(self._imSz, y + self.block1.height) + if color == 'r': + self._im[mny:mxy, mnx:mxx, 0] = 255 + elif color == 'g': + self._im[mny:mxy, mnx:mxx, 1] = 255 + else: + self._im[mny:mxy, mnx:mxx, 2] = 255 + + @overrides + def get_image(self): + imSz = self._imSz + rng = np.linspace(self._range_min, self._range_max, imSz) + x_1, y_1 = self.block1.pos[0], self.block1.pos[1] + x_2, y_2 = self.block2.pos[0], self.block2.pos[1] + self._im = np.zeros((imSz, imSz, 3), dtype=np.uint8) + self._plot_object((x_1, y_1), 'r') + self._plot_object((x_2, y_2), 'g') + return self._im.copy() + +class StackerIm(ObsIm): + + @overrides + def observation(self): + obs = {} + obs['im'] = self.simulator.get_image().flatten() + return obs + +class RewardStacker(BaseRewarder): + + @property + def block_height(self): + return self.prms['sim'].block2.pos[2] if hasattr(self.prms['sim'], 'block2') else 0 + + @overrides + def get(self): + return self.block_height + +from rlmaster.envs.move_agent import MoveTeleportSimulator, ObsIm, BaseRewarder + +class HalfCheetahSimulator(MoveTeleportSimulator): + def __init__(self, **kwargs): + super(HalfCheetahSimulator, self).__init__(**kwargs) + self.model = MjModel('../rlmaster/envs/mujoco_envs/xmls/half_cheetah.xml') + self._pos = {} + self._pos['torso'] = np.zeros((3,)) + self._range_min = -1 + self._range_max = 1 + + self._imSz = 32 + self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8) + self.body_comvel = 0 + self.action = np.zeros((1, 2)) + + @overrides + def step(self, ctrl): + self.model.data.ctrl = ctrl + for i in range(10): + self.model.step() + self.model.forward() + ind = self.model.body_names.index('torso') + self._pos['torso'] = self.model.body_pos[ind] + self.body_comvel = self.model.body_comvels[ind] + self.action = ctrl + + @overrides + def get_image(self): + imSz = self._imSz + rng = np.linspace(self._range_min, self._range_max, imSz) + g_x, g_y = self._get_bin(rng, self._pos['torso']) + self._im = np.zeros((imSz, imSz, 3), dtype=np.uint8) + self._plot_object((g_x, g_y), 'r') + return self._im.copy() + +class CheetahIm(ObsIm): + @overrides + def observation(self): + obs = {} + obs['im'] = self.simulator.get_image().flatten() + return obs + +class RewardCheetah(BaseRewarder): + + @property + def action(self): + return self.prms['sim'].action if hasattr(self.prms['sim'], 'action') else np.zeros((1,2)) + + @property + def body_comvel(self): + return self.prms['sim'].body_comvel if hasattr(self.prms['sim'], 'body_comvel') else 0 + + @overrides + def get(self): + ctrl_cost = 1e-1 * 0.5 * np.sum(np.square(self.action)) + run_cost = -1 * self.body_comvel[0] + return ctrl_cost + run_cost + def get_environment(initName='InitRandom', obsName='ObsIm', rewName='RewardSimple', actType='DiscreteActionFour', simName='MoveTeleportSimulator', From 89567ea544a1c67db03c52d8d65f81b61d739863 Mon Sep 17 00:00:00 2001 From: Pulkit Date: Fri, 24 Mar 2017 12:27:37 -0700 Subject: [PATCH 04/10] Issues in code structure --- envs/stacker_agent.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/envs/stacker_agent.py b/envs/stacker_agent.py index fd00822..aa2f94d 100644 --- a/envs/stacker_agent.py +++ b/envs/stacker_agent.py @@ -16,6 +16,8 @@ def contains(self, new_pos): return new_x >= x and new_x <= x + self.width \ and new_y >= y and new_y <= y + self.height and new_z >= z and new_z <= z + self.length + +### @pulkitag: This should inherit from BaseSimulator. Any reason for this? class SimpleStackerSimulator(MoveTeleportSimulator): def __init__(self, **kwargs): super(SimpleStackerSimulator, self).__init__(**kwargs) @@ -35,6 +37,8 @@ def __init__(self, **kwargs): @overrides def step(self, pos): + #####@pulkitag: You should make use of BaseDiscreteAction and BaseContinuousAction + #### to input the actions. assert len(pos) == 2, 'step takes in an xy position' x, y = min(pos[0], 32 - 3), min(pos[1], 32 - 3) @@ -65,14 +69,15 @@ def get_image(self): self._plot_object((x_2, y_2), 'g') return self._im.copy() -class StackerIm(ObsIm): - +####@pulkitag: Again, why are you inherting from ObsIm? +class StackerIm(ObsIm): @overrides def observation(self): obs = {} obs['im'] = self.simulator.get_image().flatten() return obs +####@pulkitag: This is good. class RewardStacker(BaseRewarder): @property @@ -82,3 +87,6 @@ def block_height(self): @overrides def get(self): return self.block_height + +###@pulkitag: See get_environment in move_agent - you should have a similar function +### so that it easy to instatiate a basic version of the environment. From 11958d75ba6c55f9e462dee7591677bbd9eac753 Mon Sep 17 00:00:00 2001 From: JasmineDeng Date: Fri, 24 Mar 2017 18:04:13 -0700 Subject: [PATCH 05/10] made cheetah and stacker into separate modules, fixed cheetah rendering --- core/gym_wrapper.py | 1 - envs/cheetah_agent.py | 171 +++++++++++++++++++++++++++++------------- envs/move_agent.py | 143 ----------------------------------- envs/stacker_agent.py | 76 +++++++++++++++---- 4 files changed, 179 insertions(+), 212 deletions(-) diff --git a/core/gym_wrapper.py b/core/gym_wrapper.py index dd9d9d6..ebde5a8 100644 --- a/core/gym_wrapper.py +++ b/core/gym_wrapper.py @@ -79,5 +79,4 @@ def render(self): def _render(self): - print('i am rendering') return self.env.render() diff --git a/envs/cheetah_agent.py b/envs/cheetah_agent.py index 42a7805..6ba9724 100644 --- a/envs/cheetah_agent.py +++ b/envs/cheetah_agent.py @@ -1,59 +1,122 @@ -from move_agent import MoveTeleportSimulator, ObsIm, BaseRewarder - -class HalfCheetahSimulator(MoveTeleportSimulator): - def __init__(self, **kwargs): - super(HalfCheetahSimulator, self).__init__(**kwargs) - self.model = MjModel('../rlmaster/envs/mujoco_envs/xmls/half_cheetah.xml') - self._pos = {} - self._pos['torso'] = np.zeros((3,)) - self._range_min = -1 - self._range_max = 1 - - self._imSz = 32 - self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8) - self.body_comvel = 0 - self.action = np.zeros((1, 2)) - - @overrides - def step(self, ctrl): - self.model.data.ctrl = ctrl - for i in range(10): - self.model.step() - self.model.forward() - ind = self.model.body_names.index('torso') - self._pos['torso'] = self.model.body_pos[ind] - self.body_comvel = self.model.body_comvels[ind] - self.action = ctrl - - @overrides - def get_image(self): - imSz = self._imSz - rng = np.linspace(self._range_min, self._range_max, imSz) - g_x, g_y = self._get_bin(rng, self._pos['torso']) - self._im = np.zeros((imSz, imSz, 3), dtype=np.uint8) - self._plot_object((g_x, g_y), 'r') - return self._im.copy() - -class CheetahIm(ObsIm): - @overrides - def observation(self): - obs = {} - obs['im'] = self.simulator.get_image().flatten() - return obs +from rlmaster.core.base_environment import * +import numpy as np +from overrides import overrides +from rllab.mujoco_py import MjModel, MjViewer + +class HalfCheetahSimulator(BaseSimulator): + def __init__(self, **kwargs): + super(HalfCheetahSimulator, self).__init__(**kwargs) + + self._imSz = 512 + self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8) + + self.model = MjModel('../rlmaster/envs/mujoco_envs/xmls/half_cheetah.xml') + self.viewer = None + self._pos = {} + self._pos['torso'] = np.zeros((3,)) + self._range_min = -1 + self._range_max = 1 + + self.body_comvel = 0 + self.action = np.zeros((1, 2)) + + self.frame_skip = 1 + + @overrides + def step(self, ctrl, loop=False): + self.model.data.ctrl = ctrl + np.random.normal(size=ctrl.shape) + for i in range(self.frame_skip): + self.model.step() + self.model.forward() + ind = self.model.body_names.index('torso') + self._pos['torso'] = self.model.body_pos[ind] + self.body_comvel = self.model.body_comvels[ind] + self.action = ctrl + + @overrides + def get_image(self): + data, width, height = self.viewer.get_image() + self._im = np.fromstring(data, dtype='uint8').reshape(height, width, 3)[::-1,:,:] + print(self._im) + return self._im.copy() + + @overrides + def _setup_renderer(self): + self.viewer = MjViewer(visible=True, init_width=self._imSz, init_height=self._imSz) + self.viewer.start() + self.viewer.set_model(self.model) + + @overrides + def render(self): + self.viewer.loop_once() + +class CheetahIm(BaseObservation): + + def get_body_com(self, body_name): + idx = self.simulator.model.body_names.index(body_name) + return self.simulator.model.data.com_subtree[idx] + + @overrides + def ndim(self): + dim = {} + dim['im'] = (1, 20) + return dim + + @overrides + def observation(self): + obs = {} + current_obs = np.concatenate([ + self.simulator.model.data.qpos.flatten()[1:], + self.simulator.model.data.qvel.flat, + self.get_body_com('torso').flat, + ]) + obs['im'] = current_obs + return obs class RewardCheetah(BaseRewarder): - @property - def action(self): - return self.prms['sim'].action if hasattr(self.prms['sim'], 'action') else np.zeros((1,2)) + @property + def action(self): + return self.prms['sim'].action if hasattr(self.prms['sim'], 'action') else np.zeros((1,2)) + + @property + def body_comvel(self): + return self.prms['sim'].body_comvel if hasattr(self.prms['sim'], 'body_comvel') else 0 + + @overrides + def get(self): + ctrl_cost = 1e-1 * 0.5 * np.sum(np.square(self.action)) + run_cost = -1 * self.body_comvel[0] + return -(ctrl_cost + run_cost) + +class ContinuousCheetahAction(BaseContinuousAction): + @overrides + def action_dim(self): + return 6 + + def minval(self): + return -1.25 + + def maxval(self): + return 1.1 + + @overrides + def process(self, action): + return action + +class InitCheetah(BaseInitializer): - @property - def body_comvel(self): - return self.prms['sim'].body_comvel if hasattr(self.prms['sim'], 'body_comvel') else 0 + @overrides + def sample_env_init(self): + pass - @overrides - def get(self): - ctrl_cost = 1e-1 * 0.5 * np.sum(np.square(self.action)) - run_cost = -1 * self.body_comvel[0] - return ctrl_cost + run_cost - \ No newline at end of file +def get_environment(max_episode_length=100, initPrms={}, obsPrms={}, actPrms={}): + sim = HalfCheetahSimulator() + initObj = InitCheetah(sim, initPrms) + obsObj = CheetahIm(sim, obsPrms) + rewPrms = { 'sim': sim } + rewObj = RewardCheetah(sim, rewPrms) + actObj = ContinuousCheetahAction(actPrms) + env = BaseEnvironment(sim, initObj, obsObj, rewObj, actObj, + params={'max_episode_length':max_episode_length}) + return env diff --git a/envs/move_agent.py b/envs/move_agent.py index a016e9e..ae3f90a 100644 --- a/envs/move_agent.py +++ b/envs/move_agent.py @@ -212,149 +212,6 @@ def get(self): else: return 0 -class StackedBox(): - - def __init__(self, pos, width=3, height=3, length=3): - assert len(pos) == 3, 'must enter an xyz position' - - self.pos = pos - self.width = width - self.height = height - self.length = length - - def contains(self, new_pos): - x, y, z = self.pos - new_x, new_y, new_z = new_pos - return new_x >= x and new_x <= x + self.width \ - and new_y >= y and new_y <= y + self.height and new_z >= z and new_z <= z + self.length - -class SimpleStackerSimulator(MoveTeleportSimulator): - def __init__(self, **kwargs): - super(SimpleStackerSimulator, self).__init__(**kwargs) - self.width = self._imSz - self.height = self._imSz - self.block1 = StackedBox(np.random.randint(0, 32 - 3, size=3)) - self.block1.pos[2] = 0 - self.block2 = StackedBox(np.random.randint(0, 32 - 3, size=3)) - self.block2.pos[2] = 0 - if self.block1.contains(self.block2.pos): - self.block2.pos[2] = self.block1.height - - self._imSz = 32 - self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8) - self._range_min = 0 - self._range_max = 32 - self.block1.width - - @overrides - def step(self, pos): - assert len(pos) == 2, 'step takes in an xy position' - - x, y = min(pos[0], 32 - 3), min(pos[1], 32 - 3) - self.block2.pos = np.array([x, y, 0]) - if self.block1.contains(self.block2.pos): - self.block2.pos[2] = self.block1.height - - @overrides - def _plot_object(self, coords, color='r'): - x, y = coords - mnx, mxx = x, min(self._imSz, x + self.block1.width) - mny, mxy = y, min(self._imSz, y + self.block1.height) - if color == 'r': - self._im[mny:mxy, mnx:mxx, 0] = 255 - elif color == 'g': - self._im[mny:mxy, mnx:mxx, 1] = 255 - else: - self._im[mny:mxy, mnx:mxx, 2] = 255 - - @overrides - def get_image(self): - imSz = self._imSz - rng = np.linspace(self._range_min, self._range_max, imSz) - x_1, y_1 = self.block1.pos[0], self.block1.pos[1] - x_2, y_2 = self.block2.pos[0], self.block2.pos[1] - self._im = np.zeros((imSz, imSz, 3), dtype=np.uint8) - self._plot_object((x_1, y_1), 'r') - self._plot_object((x_2, y_2), 'g') - return self._im.copy() - -class StackerIm(ObsIm): - - @overrides - def observation(self): - obs = {} - obs['im'] = self.simulator.get_image().flatten() - return obs - -class RewardStacker(BaseRewarder): - - @property - def block_height(self): - return self.prms['sim'].block2.pos[2] if hasattr(self.prms['sim'], 'block2') else 0 - - @overrides - def get(self): - return self.block_height - -from rlmaster.envs.move_agent import MoveTeleportSimulator, ObsIm, BaseRewarder - -class HalfCheetahSimulator(MoveTeleportSimulator): - def __init__(self, **kwargs): - super(HalfCheetahSimulator, self).__init__(**kwargs) - self.model = MjModel('../rlmaster/envs/mujoco_envs/xmls/half_cheetah.xml') - self._pos = {} - self._pos['torso'] = np.zeros((3,)) - self._range_min = -1 - self._range_max = 1 - - self._imSz = 32 - self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8) - self.body_comvel = 0 - self.action = np.zeros((1, 2)) - - @overrides - def step(self, ctrl): - self.model.data.ctrl = ctrl - for i in range(10): - self.model.step() - self.model.forward() - ind = self.model.body_names.index('torso') - self._pos['torso'] = self.model.body_pos[ind] - self.body_comvel = self.model.body_comvels[ind] - self.action = ctrl - - @overrides - def get_image(self): - imSz = self._imSz - rng = np.linspace(self._range_min, self._range_max, imSz) - g_x, g_y = self._get_bin(rng, self._pos['torso']) - self._im = np.zeros((imSz, imSz, 3), dtype=np.uint8) - self._plot_object((g_x, g_y), 'r') - return self._im.copy() - -class CheetahIm(ObsIm): - @overrides - def observation(self): - obs = {} - obs['im'] = self.simulator.get_image().flatten() - return obs - -class RewardCheetah(BaseRewarder): - - @property - def action(self): - return self.prms['sim'].action if hasattr(self.prms['sim'], 'action') else np.zeros((1,2)) - - @property - def body_comvel(self): - return self.prms['sim'].body_comvel if hasattr(self.prms['sim'], 'body_comvel') else 0 - - @overrides - def get(self): - ctrl_cost = 1e-1 * 0.5 * np.sum(np.square(self.action)) - run_cost = -1 * self.body_comvel[0] - return ctrl_cost + run_cost - - def get_environment(initName='InitRandom', obsName='ObsIm', rewName='RewardSimple', actType='DiscreteActionFour', simName='MoveTeleportSimulator', max_episode_length=100, initPrms={}, obsPrms={}, rewPrms={}, diff --git a/envs/stacker_agent.py b/envs/stacker_agent.py index fd00822..0a53308 100644 --- a/envs/stacker_agent.py +++ b/envs/stacker_agent.py @@ -1,4 +1,7 @@ -from move_agent import MoveTeleportSimulator, ObsIm, BaseRewarder +from rlmaster.core.base_environment import * +import numpy as np +from overrides import overrides +from pyhelper_fns import vis_utils class StackedBox(): @@ -16,19 +19,15 @@ def contains(self, new_pos): return new_x >= x and new_x <= x + self.width \ and new_y >= y and new_y <= y + self.height and new_z >= z and new_z <= z + self.length -class SimpleStackerSimulator(MoveTeleportSimulator): +class SimpleStackerSimulator(BaseSimulator): def __init__(self, **kwargs): super(SimpleStackerSimulator, self).__init__(**kwargs) + self._imSz = 32 self.width = self._imSz self.height = self._imSz - self.block1 = StackedBox(np.random.randint(0, 32 - 3, size=3)) - self.block1.pos[2] = 0 - self.block2 = StackedBox(np.random.randint(0, 32 - 3, size=3)) - self.block2.pos[2] = 0 - if self.block1.contains(self.block2.pos): - self.block2.pos[2] = self.block1.height + self.block1 = StackedBox(np.zeros(3)) + self.block2 = StackedBox(np.zeros(3)) - self._imSz = 32 self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8) self._range_min = 0 self._range_max = 32 - self.block1.width @@ -42,11 +41,10 @@ def step(self, pos): if self.block1.contains(self.block2.pos): self.block2.pos[2] = self.block1.height - @overrides def _plot_object(self, coords, color='r'): x, y = coords - mnx, mxx = x, min(self._imSz, x + self.block1.width) - mny, mxy = y, min(self._imSz, y + self.block1.height) + mnx, mxx = int(x), int(min(self._imSz, x + self.block1.width)) + mny, mxy = int(y), int(min(self._imSz, y + self.block1.height)) if color == 'r': self._im[mny:mxy, mnx:mxx, 0] = 255 elif color == 'g': @@ -65,12 +63,26 @@ def get_image(self): self._plot_object((x_2, y_2), 'g') return self._im.copy() -class StackerIm(ObsIm): + @overrides + def _setup_renderer(self): + self._canvas = vis_utils.MyAnimation(None, height=self._imSz, width=self._imSz) + + @overrides + def render(self): + self._canvas._display(self.get_image()) + +class StackerIm(BaseObservation): + + @overrides + def ndim(self): + dim = {} + dim['im'] = (self.simulator._imSz, self.simulator._imSz, 3) + return dim @overrides def observation(self): obs = {} - obs['im'] = self.simulator.get_image().flatten() + obs['im'] = self.simulator.get_image() return obs class RewardStacker(BaseRewarder): @@ -82,3 +94,39 @@ def block_height(self): @overrides def get(self): return self.block_height + +class ContinuousStackerAction(BaseContinuousAction): + @overrides + def action_dim(self): + return 2 + + @overrides + def process(self, action): + return np.around(action, decimals=0) + + def minval(self): + return 0 + + def maxval(self): + return 32 + +class InitStacker(BaseInitializer): + @overrides + def sample_env_init(self): + self.simulator.block1.pos = np.random.randint(0, 32 - 3, size=3) + self.simulator.block1.pos[2] = 0 + self.simulator.block2.pos = np.random.randint(0, 32 - 3, size=3) + self.simulator.block2.pos[2] = 0 + if self.simulator.block1.contains(self.simulator.block2.pos): + self.simulator.block2.pos[2] = self.simulator.block1.height + +def get_environment(max_episode_length=100, initPrms={}, obsPrms={}, rewPrms={}, actPrms={}): + sim = SimpleStackerSimulator() + initObj = InitStacker(initPrms) + obsObj = StackerIm(sim, obsPrms) + rewPrms = { 'sim': sim } + rewObj = RewardStacker(sim, rewPrms) + actObj = ContinuousStackerAction(actPrms) + env = BaseEnvironment(sim, initObj, obsObj, rewObj, actObj, + params={'max_episode_length':max_episode_length}) + return env From 3e94883fca10bf3d0d7cfbe8a32b1b99da030983 Mon Sep 17 00:00:00 2001 From: JasmineDeng Date: Sun, 26 Mar 2017 03:11:55 -0700 Subject: [PATCH 06/10] initialized cheetah correctly, modified README --- README.md | 10 ++++++---- envs/cheetah_agent.py | 14 ++++++++++++-- 2 files changed, 18 insertions(+), 6 deletions(-) diff --git a/README.md b/README.md index dd1fa7c..3e58f48 100644 --- a/README.md +++ b/README.md @@ -20,8 +20,9 @@ An environment is defined by: - ActionProcessor: an object of `type BaseAction` that defines functions that process the action. Generally simulator works by applying continous forces/torque, however the desired action space might be discrete or continuous. `BaseAction` objects allow the user to easily switch between different action spaces. - + ### Interactive Mode + To run environments in interactive mode, a function mapping the keyboard inputs into commands supplied to the agent must be defined. This function is typically called, `str2action` in `rlmaster` repository. The environment can be run in interactive mode in the following way: ``` @@ -31,7 +32,9 @@ env.interactive(move_single_env.str2action) ``` You can use the commands, `w, s, d, a` to move the agent and `q` to quit the interactive mode. - +### Cheetah in Interactive Mode + +No interface has been developed yet, but after creating an instance of SimpleStackerAgent in from `stacker_agent`, calling `_setup_renderer` and `render` will open a display window. You can step by entering an xy position that will move the second block there instantly. Calling `render` again will show the updated positions of the blocks. They are initialized at position (0,0) if not called with the initializer. ##Environment in openAI gym format @@ -41,8 +44,7 @@ from core import gym_wrapper env = move_agent.get_environment() gymEnv = gym_wwapper.GymWrapper(env) ``` - - + diff --git a/envs/cheetah_agent.py b/envs/cheetah_agent.py index ca20471..973da33 100644 --- a/envs/cheetah_agent.py +++ b/envs/cheetah_agent.py @@ -19,7 +19,10 @@ def __init__(self, **kwargs): self.body_comvel = 0 self.action = np.zeros((1, 2)) - + self.init_qpos = self.model.data.qpos + self.init_qvel = self.model.data.qvel + self.init_qacc = self.model.data.qacc + self.init_ctrl = self.model.data.ctrl self.frame_skip = 1 @overrides @@ -107,9 +110,16 @@ def process(self, action): # TODO(jasmine): fix this so it initializes (is it called at the start of each epoch? find out) class InitCheetah(BaseInitializer): + def reset_mujoco(self): + self.simulator.model.data.qpos = self.simulator.init_qpos + self.simulator.model.data.qvel = self.simulator.init_qvel + self.simulator.model.data.qacc = self.simulator.init_qacc + self.simulator.model.data.ctrl = self.simulator.init_ctrl + @overrides def sample_env_init(self): - pass + self.reset_mujoco() + self.simulator.model.forward() def get_environment(max_episode_length=100, initPrms={}, obsPrms={}, actPrms={}): sim = HalfCheetahSimulator() From 1625572f0c3a5c9485b4f5829cc87fd3750ffb58 Mon Sep 17 00:00:00 2001 From: JasmineDeng Date: Fri, 7 Apr 2017 18:13:59 -0700 Subject: [PATCH 07/10] working stacker environment --- envs/cheetah_agent.py | 10 ++- envs/move_agent.py | 6 +- envs/reacher_agent.py | 196 ++++++++++++++++++++++++++++++++++++++++++ envs/stacker_agent.py | 114 +++++++++++++++--------- 4 files changed, 278 insertions(+), 48 deletions(-) create mode 100644 envs/reacher_agent.py diff --git a/envs/cheetah_agent.py b/envs/cheetah_agent.py index 973da33..cc2351b 100644 --- a/envs/cheetah_agent.py +++ b/envs/cheetah_agent.py @@ -27,7 +27,9 @@ def __init__(self, **kwargs): @overrides def step(self, ctrl, loop=False): - self.model.data.ctrl = ctrl + np.random.normal(size=ctrl.shape) + ctrl = np.clip(ctrl, *(np.array([-1, -1, -1, -1, -1, -1]), np.array([1, 1, 1, 1, 1, 1]))) + self.model.data.ctrl = ctrl # + np.random.normal(size=ctrl.shape) + # print('gym', self.model.data.ctrl) for i in range(self.frame_skip): self.model.step() self.model.forward() @@ -40,7 +42,7 @@ def step(self, ctrl, loop=False): def get_image(self): data, width, height = self.viewer.get_image() self._im = np.fromstring(data, dtype='uint8').reshape(height, width, 3)[::-1,:,:] - print(self._im) + # print(self._im) return self._im.copy() @overrides @@ -111,8 +113,8 @@ def process(self, action): class InitCheetah(BaseInitializer): def reset_mujoco(self): - self.simulator.model.data.qpos = self.simulator.init_qpos - self.simulator.model.data.qvel = self.simulator.init_qvel + self.simulator.model.data.qpos = self.simulator.init_qpos + 0.01 * np.random.normal(size=self.simulator.init_qpos.shape) + self.simulator.model.data.qvel = self.simulator.init_qvel + 0.1 * np.random.normal(size=self.simulator.init_qvel.shape) self.simulator.model.data.qacc = self.simulator.init_qacc self.simulator.model.data.ctrl = self.simulator.init_ctrl diff --git a/envs/move_agent.py b/envs/move_agent.py index ae3f90a..e8c78ff 100644 --- a/envs/move_agent.py +++ b/envs/move_agent.py @@ -184,19 +184,19 @@ def observation(self): for i, k in enumerate(self.simulator._pos.keys()): obs[2*i, 2*i + 2] = self.simulator._pos[k].copy() return obs - class ObsIm(BaseObservation): @overrides def ndim(self): dim = {} - dim['im'] = (self.simulator._imSz, self.simulator._imSz, 3) + # dim['im'] = (self.simulator._imSz, self.simulator._imSz, 3) + dim['im'] = (3072, 1) return dim @overrides def observation(self): obs = {} - obs['im'] = self.simulator.get_image() + obs['im'] = self.simulator.get_image().flatten() return obs class RewardSimple(BaseRewarder): diff --git a/envs/reacher_agent.py b/envs/reacher_agent.py new file mode 100644 index 0000000..4789b4a --- /dev/null +++ b/envs/reacher_agent.py @@ -0,0 +1,196 @@ +from rlmaster.core.base_environment import * +import numpy as np +from overrides import overrides +from pyhelper_fns import vis_utils + +def contains(obj1, obj2): + x1, y1, _ = obj1.pos.tolist() + x2, y2, _ = obj2.pos.tolist() + s1, s2 = obj1.size, obj2.size + if x2 + w2 <= x1 or x1 + w1 <= x2: + return False + if y2 + l2 <= y1 or y1 + l1 <= y2: + return False + return True + +class StackedBox(): + + def __init__(self, pos, size=3): + assert len(pos) == 3, 'must enter an xyz position' + + self.pos = pos.reshape((3,)) + # height is the height in 3d, box in 2d is width x length + self.size = size + +class ReacherArm(): + + def __init__(self, pos, size=4): + # takes in xyz position, but z is implicitly infinity + self.pos = pos.reshape((2,)) + self.size = size + + self.hold_block = False + +class SimpleReacherSimulator(BaseSimulator): + def __init__(self, **kwargs): + # should always have same size blocks (width = height = length = n) + super(SimpleStackerSimulator, self).__init__(**kwargs) + self._imSz = 32 + self.width = self._imSz + self.height = self._imSz + self.move_block = StackedBox(np.zeros(3)) + self.other_block = StackedBox(np.zeros(3)) + + self.reacher_arm = ReacherArm(np.zeros(3)) + + self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8) + self._range_min = 0 + self._range_max = 32 - self.move_block.size + + @overrides + def step(self, action): + # should take in xyz position, but z position doesn't matter + action = action.reshape((5,)) + pos = action[:3] + grab_act = action[3] < 4 + drop_act = action[4] < 4 + print(grab_act, drop_act) + + if grab_act: + if contains(self.reacher_arm, self.move_block): + self.reacher_arm.hold_block = True + + elif drop_act: + if self.reacher_arm.hold_block: + self.reacher_arm.hold_block = False + self.move_block.pos[2] = 0 + if contains(self.other_block, self.move_block): + self.move_block.pos[2] = self.other_block.size + + if self.reacher_arm.hold_block: + delta = pos[0] - self.reacher_arm.pos[0], pos[1] - self.reacher_arm.pos[1] + self.move_block.pos = np.array([self.move_block.pos[0] + delta[0], + self.move_block.pos[1] + delta[1], 10]) # to preserve relative position + + self.reacher_arm.pos = pos + + def _plot_object(self, coords, box, color='r'): + x, y = coords + width, length = box[0], box[1] + mnx, mxx = int(x), int(min(self._imSz, x + width)) + mny, mxy = int(y), int(min(self._imSz, y + length)) + if color == 'r': + self._im[mny:mxy, mnx:mxx, 0] = 255 + elif color == 'g': + self._im[mny:mxy, mnx:mxx, 1] = 255 + else: + self._im[mny:mxy, mnx:mxx, 2] = 255 + + @overrides + def get_image(self): + # other blocks in red, active block in green + imSz = self._imSz + self._im = np.zeros((imSz, imSz, 3), dtype=np.uint8) + x, y = self.other_block.pos[0], self.other_block.pos[1] + self._plot_object((x, y), (3, 3), 'r') + x, y = self.move_block.pos[0], self.move_block.pos[1] + self._plot_object((x, y), (3, 3), 'g') + x, y = self.reacher_arm.pos[0], self.reacher_arm.pos[1] + self._plot_object((x, y), (4, 4), 'b') + return self._im.copy() + + @overrides + def _setup_renderer(self): + self._canvas = vis_utils.MyAnimation(None, height=self._imSz, width=self._imSz) + + @overrides + def render(self): + self._canvas._display(self.get_image()) + +class StackerIm(BaseObservation): + + @overrides + def ndim(self): + dim = {} + dim['im'] = (self.simulator._imSz, self.simulator._imSz, 3) + return dim + + @overrides + def observation(self): + obs = {} + obs['im'] = self.simulator.get_image() + return obs + +class StateIm(BaseObservation): + + @overrides + def ndim(self): + dim = {} + dim['im'] = (9, 1) + return dim + + @overrides + def observation(self): + obs = {} + new_im = np.append(self.simulator.move_block.pos, self.simulator.other_block.pos) + new_im = np.append(new_im, self.simulator.reacher_arm.pos[:2]) + new_im = np.append(new_im, np.array([1 if self.simulator.reacher_arm.hold_block else 0])) + obs['im'] = new_im + return obs + +####@pulkitag: This is good. +class RewardReacher(BaseRewarder): + + @property + def contains_block(self): + if hasattr(self.prms['sim'], 'reacher_arm') and hasattr(self.prms['sim'], 'move_block') and hasattr(self.prms['sim'], 'other_block'): + move_block = self.prms['sim'].move_block0, 0, 0, + other_block = self.prms['sim'].other_block + reacher_arm = self.prms['sim'].reacher_arm + return 1 if move_block.pos[2] == other_block.size and not reacher_arm.hold_block else 0 + return 0 + + @overrides + def get(self): + return self.contains_block + +# need additional actions: to pick up and to drop the block +class ContinuousReacherAction(BaseContinuousAction): + @overrides + def action_dim(self): + return 5 + + @overrides + def process(self, action): + return np.around(action, decimals=0) + + def minval(self): + return 0 + + def maxval(self): + return 32 + +class InitReacher(BaseInitializer): + @overrides + def sample_env_init(self): + sim = self.simulator['sim'] + sim.reacher_arm.pos = np.random.randint(0, 32 - sim.reacher_arm.size, size=3) + sim.reacher_arm.hold_block = False + sim.move_block.pos = np.random.randint(0, 32 - sim.move_block.size, size=3) + sim.move_block.pos[2] = 0 + sim.other_block.pos = np.random.randint(0, 32 - sim.move_block.size, size=3) + sim.other_block.pos[2] = 0 + if contains(sim.other_block, sim.move_block): + sim.move_block.pos[2] = sim.other_block.size + +def get_environment(obsType='StateIm', max_episode_length=100, initPrms={}, obsPrms={}, rewPrms={}, actPrms={}): + sim = SimpleReacherSimulator() + initPrms = {'sim' : sim} + initObj = InitReacher(initPrms) + obsObj = globals()[obsType](sim, obsPrms) + rewPrms = { 'sim': sim } + rewObj = RewardReacher(sim, rewPrms) + actObj = ContinuousReacherAction(actPrms) + env = BaseEnvironment(sim, initObj, obsObj, rewObj, actObj, + params={'max_episode_length':max_episode_length}) + return env diff --git a/envs/stacker_agent.py b/envs/stacker_agent.py index 9b5a1d5..d97c84f 100644 --- a/envs/stacker_agent.py +++ b/envs/stacker_agent.py @@ -3,21 +3,22 @@ from overrides import overrides from pyhelper_fns import vis_utils -class StackedBox(): - - def __init__(self, pos, width=3, height=3, length=3): - assert len(pos) == 3, 'must enter an xyz position' +def contains(obj1, obj2): + x1, y1, _ = obj1.pos.tolist() + x2, y2, _ = obj2.pos.tolist() + s1, s2 = obj1.size, obj2.size + if x2 + s2 <= x1 or x1 + s1 <= x2: + return False + if y2 + s2 <= y1 or y1 + s1 <= y2: + return False + return True - self.pos = pos - self.width = width - self.height = height - self.length = length +class StackedBox(): - def contains(self, new_pos): - x, y, z = self.pos - new_x, new_y, new_z = new_pos - return new_x >= x and new_x <= x + self.width \ - and new_y >= y and new_y <= y + self.height and new_z >= z and new_z <= z + self.length + def __init__(self, pos, size=2): + # position is center of the box + self.pos = pos.reshape((3,)) + self.size = size class SimpleStackerSimulator(BaseSimulator): def __init__(self, **kwargs): @@ -25,28 +26,30 @@ def __init__(self, **kwargs): self._imSz = 32 self.width = self._imSz self.height = self._imSz - self.block1 = StackedBox(np.zeros(3)) - self.block2 = StackedBox(np.zeros(3)) + self.move_block = StackedBox(np.zeros(3)) + self.other_block = StackedBox(np.zeros(3)) self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8) - self._range_min = 0 - self._range_max = 32 - self.block1.width + self._range_min = self.move_block.size + self._range_max = self._imSz - self.move_block.size @overrides def step(self, pos): #####@pulkitag: You should make use of BaseDiscreteAction and BaseContinuousAction #### to input the actions. - assert len(pos) == 2, 'step takes in an xy position' - - x, y = min(pos[0], 32 - 3), min(pos[1], 32 - 3) - self.block2.pos = np.array([x, y, 0]) - if self.block1.contains(self.block2.pos): - self.block2.pos[2] = self.block1.height + pos = pos.reshape((2,)) + pos = np.append(pos, np.zeros(1)) + + new_pos = np.clip(self.move_block.pos + pos, self._range_min, self._range_max) + new_pos[2] = 0 + self.move_block.pos = new_pos + if contains(self.move_block, self.other_block): + self.move_block.pos[2] = self.other_block.size def _plot_object(self, coords, color='r'): x, y = coords - mnx, mxx = int(x), int(min(self._imSz, x + self.block1.width)) - mny, mxy = int(y), int(min(self._imSz, y + self.block1.height)) + mnx, mxx = int(x), int(min(self._imSz, x + self.move_block.size)) + mny, mxy = int(y), int(min(self._imSz, y + self.move_block.size)) if color == 'r': self._im[mny:mxy, mnx:mxx, 0] = 255 elif color == 'g': @@ -58,8 +61,8 @@ def _plot_object(self, coords, color='r'): def get_image(self): imSz = self._imSz rng = np.linspace(self._range_min, self._range_max, imSz) - x_1, y_1 = self.block1.pos[0], self.block1.pos[1] - x_2, y_2 = self.block2.pos[0], self.block2.pos[1] + x_1, y_1 = self.other_block.pos[0], self.other_block.pos[1] + x_2, y_2 = self.move_block.pos[0], self.move_block.pos[1] self._im = np.zeros((imSz, imSz, 3), dtype=np.uint8) self._plot_object((x_1, y_1), 'r') self._plot_object((x_2, y_2), 'g') @@ -87,12 +90,38 @@ def observation(self): obs['im'] = self.simulator.get_image() return obs +class StateIm(BaseObservation): + + @overrides + def ndim(self): + dim = {} + dim['im'] = (6, 1) + return dim + + def scale(self, obs): + norm = np.linalg.norm(obs) + obs = np.copy(obs) + obs = obs / 16 - 1 + return obs + + @overrides + def observation(self): + obs = {} + m_pos = self.simulator.move_block.pos + o_pos = self.simulator.other_block.pos + new_im = np.append(self.scale(m_pos), self.scale(o_pos)) + obs['im'] = new_im + return obs + + ####@pulkitag: This is good. class RewardStacker(BaseRewarder): @property def block_height(self): - return self.prms['sim'].block2.pos[2] if hasattr(self.prms['sim'], 'block2') else 0 + if hasattr(self.prms['sim'], 'other_block') and hasattr(self.prms['sim'], 'move_block'): + return 1 if self.prms['sim'].move_block.pos[2] == self.prms['sim'].other_block.size else 0 + return 0 @overrides def get(self): @@ -105,29 +134,32 @@ def action_dim(self): @overrides def process(self, action): - return np.around(action, decimals=0) + return action def minval(self): - return 0 + return -1 def maxval(self): - return 32 + return 1 class InitStacker(BaseInitializer): @overrides def sample_env_init(self): - self.simulator.block1.pos = np.random.randint(0, 32 - 3, size=3) - self.simulator.block1.pos[2] = 0 - self.simulator.block2.pos = np.random.randint(0, 32 - 3, size=3) - self.simulator.block2.pos[2] = 0 - if self.simulator.block1.contains(self.simulator.block2.pos): - self.simulator.block2.pos[2] = self.simulator.block1.height - -def get_environment(max_episode_length=100, initPrms={}, obsPrms={}, rewPrms={}, actPrms={}): + sim = self.simulator['sim'] + size = sim.move_block.size + sim.move_block.pos = np.random.randint(sim._range_min, sim._range_max, size=3) + sim.move_block.pos[2] = 0 + sim.other_block.pos = np.random.randint(sim._range_min, sim._range_max, size=3) + sim.other_block.pos[2] = 0 + if contains(sim.move_block, sim.other_block): + sim.move_block.pos[2] = size + +def get_environment(obsType='StateIm', max_episode_length=100, initPrms={}, obsPrms={}, rewPrms={}, actPrms={}): sim = SimpleStackerSimulator() - initObj = InitStacker(initPrms) - obsObj = StackerIm(sim, obsPrms) rewPrms = { 'sim': sim } + initPrms = { 'sim': sim } + initObj = InitStacker(initPrms) + obsObj = globals()[obsType](sim, obsPrms) rewObj = RewardStacker(sim, rewPrms) actObj = ContinuousStackerAction(actPrms) env = BaseEnvironment(sim, initObj, obsObj, rewObj, actObj, From 4ce904bdbfde0ad4e3af5aa479b85829b97a0b46 Mon Sep 17 00:00:00 2001 From: JasmineDeng Date: Mon, 17 Apr 2017 20:23:13 -0700 Subject: [PATCH 08/10] two blocks in reacher_agent now --- core/gym_wrapper.py | 1 + envs/reacher_agent.py | 174 ++++++++++++++++++++++++------------------ envs/stacker_agent.py | 11 +-- 3 files changed, 106 insertions(+), 80 deletions(-) diff --git a/core/gym_wrapper.py b/core/gym_wrapper.py index ebde5a8..3a08bf5 100644 --- a/core/gym_wrapper.py +++ b/core/gym_wrapper.py @@ -62,6 +62,7 @@ def _step(self, action): self.env.step(action) obs = self._observation() reward = self.env.reward() + # done = self.env.simulator.done # test? done = False return obs, reward, done, dict(reward=reward) diff --git a/envs/reacher_agent.py b/envs/reacher_agent.py index 4789b4a..04d9f69 100644 --- a/envs/reacher_agent.py +++ b/envs/reacher_agent.py @@ -4,81 +4,85 @@ from pyhelper_fns import vis_utils def contains(obj1, obj2): - x1, y1, _ = obj1.pos.tolist() - x2, y2, _ = obj2.pos.tolist() + if not obj1 or not obj2: + return False + x1, y1 = obj1.pos[0], obj1.pos[1] + x2, y2 = obj2.pos[0], obj2.pos[1] s1, s2 = obj1.size, obj2.size - if x2 + w2 <= x1 or x1 + w1 <= x2: + x1, y1 = x1, y1 + x2, y2 = x2, y2 + if x2 + 2 * s2 <= x1 or x1 + 2 * s1 <= x2: return False - if y2 + l2 <= y1 or y1 + l1 <= y2: + if y2 + 2 * s2 <= y1 or y1 + 2 * s1 <= y2: return False return True class StackedBox(): - - def __init__(self, pos, size=3): - assert len(pos) == 3, 'must enter an xyz position' - + def __init__(self, pos, size=2): self.pos = pos.reshape((3,)) - # height is the height in 3d, box in 2d is width x length self.size = size class ReacherArm(): - - def __init__(self, pos, size=4): - # takes in xyz position, but z is implicitly infinity + def __init__(self, pos, size=2): self.pos = pos.reshape((2,)) self.size = size - self.hold_block = False + self.block = None class SimpleReacherSimulator(BaseSimulator): def __init__(self, **kwargs): # should always have same size blocks (width = height = length = n) - super(SimpleStackerSimulator, self).__init__(**kwargs) + super(SimpleReacherSimulator, self).__init__(**kwargs) self._imSz = 32 self.width = self._imSz self.height = self._imSz - self.move_block = StackedBox(np.zeros(3)) - self.other_block = StackedBox(np.zeros(3)) - - self.reacher_arm = ReacherArm(np.zeros(3)) + self.move_block = [StackedBox(np.zeros(3)), StackedBox(np.zeros(3))] + self.goal = StackedBox(np.zeros(3)) + self.reacher_arm = ReacherArm(np.zeros(2)) self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8) - self._range_min = 0 - self._range_max = 32 - self.move_block.size + self._range_min = self.reacher_arm.size + self._range_max = 32 - self.reacher_arm.size + + @property + def done(self): + for block in self.move_block: + if block.pos[2] != self.goal.size: + return False + return True @overrides def step(self, action): - # should take in xyz position, but z position doesn't matter - action = action.reshape((5,)) - pos = action[:3] - grab_act = action[3] < 4 - drop_act = action[4] < 4 - print(grab_act, drop_act) - - if grab_act: - if contains(self.reacher_arm, self.move_block): - self.reacher_arm.hold_block = True - - elif drop_act: - if self.reacher_arm.hold_block: - self.reacher_arm.hold_block = False - self.move_block.pos[2] = 0 - if contains(self.other_block, self.move_block): - self.move_block.pos[2] = self.other_block.size - - if self.reacher_arm.hold_block: - delta = pos[0] - self.reacher_arm.pos[0], pos[1] - self.reacher_arm.pos[1] - self.move_block.pos = np.array([self.move_block.pos[0] + delta[0], - self.move_block.pos[1] + delta[1], 10]) # to preserve relative position - - self.reacher_arm.pos = pos - + action = action.reshape((2,)) + new_pos = self.reacher_arm.pos + action + + on_goal = contains(self.reacher_arm.block, self.goal) + on_block = None + for block in self.move_block: + if contains(self.reacher_arm, block): + on_block = block + break + + if self.reacher_arm.block is None and on_block is not None and not contains(on_block, self.goal): + self.reacher_arm.block = on_block + + if on_goal: + self.reacher_arm.block.pos[2] = self.goal.size + self.reacher_arm.block = None + + if self.reacher_arm.block is not None: + block = self.reacher_arm.block + delta = new_pos[0] - self.reacher_arm.pos[0], new_pos[1] - self.reacher_arm.pos[1] + block.pos = np.array([block.pos[0] + delta[0], + block.pos[1] + delta[1], 32]) # to preserve relative position + block.pos = np.clip(block.pos, self._range_min, self._range_max) + + self.reacher_arm.pos = np.clip(new_pos, self._range_min, self._range_max) + def _plot_object(self, coords, box, color='r'): x, y = coords - width, length = box[0], box[1] - mnx, mxx = int(x), int(min(self._imSz, x + width)) - mny, mxy = int(y), int(min(self._imSz, y + length)) + mnx, mxx = int(max(x - box[0], 0)), int(min(self._imSz, x + box[0])) + mny, mxy = int(max(y - box[1], 0)), int(min(self._imSz, y + box[1])) if color == 'r': self._im[mny:mxy, mnx:mxx, 0] = 255 elif color == 'g': @@ -91,12 +95,13 @@ def get_image(self): # other blocks in red, active block in green imSz = self._imSz self._im = np.zeros((imSz, imSz, 3), dtype=np.uint8) - x, y = self.other_block.pos[0], self.other_block.pos[1] - self._plot_object((x, y), (3, 3), 'r') - x, y = self.move_block.pos[0], self.move_block.pos[1] - self._plot_object((x, y), (3, 3), 'g') + x, y = self.goal.pos[0], self.goal.pos[1] + self._plot_object((x, y), (2, 2), 'r') + for block in self.move_block: + x, y = block.pos[0], block.pos[1] + self._plot_object((x, y), (2, 2), 'g') x, y = self.reacher_arm.pos[0], self.reacher_arm.pos[1] - self._plot_object((x, y), (4, 4), 'b') + self._plot_object((x, y), (2, 2), 'b') return self._im.copy() @overrides @@ -105,6 +110,8 @@ def _setup_renderer(self): @overrides def render(self): + if not hasattr(self, '_canvas'): + self._setup_renderer() self._canvas._display(self.get_image()) class StackerIm(BaseObservation): @@ -126,28 +133,42 @@ class StateIm(BaseObservation): @overrides def ndim(self): dim = {} - dim['im'] = (9, 1) + num_blocks = len(self.simulator.move_block) + dim['im'] = (num_blocks * 3 + 6, 1) return dim + def scale(self, obs): + obs = np.copy(obs) + obs = obs / 16 - 1 + return obs + @overrides def observation(self): obs = {} - new_im = np.append(self.simulator.move_block.pos, self.simulator.other_block.pos) - new_im = np.append(new_im, self.simulator.reacher_arm.pos[:2]) - new_im = np.append(new_im, np.array([1 if self.simulator.reacher_arm.hold_block else 0])) + + m_block = self.simulator.move_block + g_pos = self.simulator.goal.pos + r_pos = self.simulator.reacher_arm.pos + new_im = np.append(self.scale(g_pos), self.scale(r_pos)) + new_im = np.append(new_im, np.array([1 if self.simulator.reacher_arm.block is not None else 0])) + for b in m_block: + new_im = np.append(new_im, self.scale(b.pos)) obs['im'] = new_im return obs ####@pulkitag: This is good. class RewardReacher(BaseRewarder): - + @property def contains_block(self): - if hasattr(self.prms['sim'], 'reacher_arm') and hasattr(self.prms['sim'], 'move_block') and hasattr(self.prms['sim'], 'other_block'): - move_block = self.prms['sim'].move_block0, 0, 0, - other_block = self.prms['sim'].other_block - reacher_arm = self.prms['sim'].reacher_arm - return 1 if move_block.pos[2] == other_block.size and not reacher_arm.hold_block else 0 + if hasattr(self.prms['sim'], 'move_block') and hasattr(self.prms['sim'], 'goal'): + move_block = self.prms['sim'].move_block + goal = self.prms['sim'].goal + reward = 0 + for block in move_block: + if block.pos[2] == goal.size: + reward += 1 + return reward / float(len(move_block)) return 0 @overrides @@ -158,30 +179,33 @@ def get(self): class ContinuousReacherAction(BaseContinuousAction): @overrides def action_dim(self): - return 5 + return 2 @overrides def process(self, action): - return np.around(action, decimals=0) + return action def minval(self): - return 0 + return -1 def maxval(self): - return 32 + return 1 class InitReacher(BaseInitializer): @overrides def sample_env_init(self): sim = self.simulator['sim'] - sim.reacher_arm.pos = np.random.randint(0, 32 - sim.reacher_arm.size, size=3) - sim.reacher_arm.hold_block = False - sim.move_block.pos = np.random.randint(0, 32 - sim.move_block.size, size=3) - sim.move_block.pos[2] = 0 - sim.other_block.pos = np.random.randint(0, 32 - sim.move_block.size, size=3) - sim.other_block.pos[2] = 0 - if contains(sim.other_block, sim.move_block): - sim.move_block.pos[2] = sim.other_block.size + sim.reacher_arm.pos = np.random.randint(sim._range_min, sim._range_max, size=2) + sim.reacher_arm.block = None + + sim.goal.pos = np.random.randint(sim._range_min, sim._range_max, size=3) + sim.goal.pos[2] = 0 + for block in sim.move_block: + block.pos = np.random.randint(sim._range_min, sim._range_max, size=3) + block.pos[2] = 0 + if contains(sim.goal, block): + block.pos[2] = sim.goal.size + def get_environment(obsType='StateIm', max_episode_length=100, initPrms={}, obsPrms={}, rewPrms={}, actPrms={}): sim = SimpleReacherSimulator() diff --git a/envs/stacker_agent.py b/envs/stacker_agent.py index d97c84f..d8b81ce 100644 --- a/envs/stacker_agent.py +++ b/envs/stacker_agent.py @@ -4,9 +4,11 @@ from pyhelper_fns import vis_utils def contains(obj1, obj2): - x1, y1, _ = obj1.pos.tolist() - x2, y2, _ = obj2.pos.tolist() + x1, y1 = obj1.pos[0], obj1.pos[1] + x2, y2 = obj2.pos[0], obj2.pos[1] s1, s2 = obj1.size, obj2.size + x1, y1 = x1 - s1, y1 - s1 + x2, y2 = x2 - s2, y2 - s2 if x2 + s2 <= x1 or x1 + s1 <= x2: return False if y2 + s2 <= y1 or y1 + s1 <= y2: @@ -14,7 +16,6 @@ def contains(obj1, obj2): return True class StackedBox(): - def __init__(self, pos, size=2): # position is center of the box self.pos = pos.reshape((3,)) @@ -48,8 +49,8 @@ def step(self, pos): def _plot_object(self, coords, color='r'): x, y = coords - mnx, mxx = int(x), int(min(self._imSz, x + self.move_block.size)) - mny, mxy = int(y), int(min(self._imSz, y + self.move_block.size)) + mnx, mxx = max(x - self.move_block.size, 0), min(self._imSz, x + self.move_block.size) + mny, mxy = max(y - self.move_block.size, 0), min(self._imSz, y + self.move_block.size) if color == 'r': self._im[mny:mxy, mnx:mxx, 0] = 255 elif color == 'g': From 531715eaa857c9e6fa2bec8bde979d778fd51e57 Mon Sep 17 00:00:00 2001 From: JasmineDeng Date: Fri, 21 Apr 2017 17:32:20 -0700 Subject: [PATCH 09/10] working training for 1 and 2 blocks --- core/gym_wrapper.py | 3 +-- envs/reacher_agent.py | 30 ++++++++++++------------------ 2 files changed, 13 insertions(+), 20 deletions(-) diff --git a/core/gym_wrapper.py b/core/gym_wrapper.py index 3a08bf5..acb490d 100644 --- a/core/gym_wrapper.py +++ b/core/gym_wrapper.py @@ -62,8 +62,7 @@ def _step(self, action): self.env.step(action) obs = self._observation() reward = self.env.reward() - # done = self.env.simulator.done # test? - done = False + done = self.env.simulator.done # test? return obs, reward, done, dict(reward=reward) diff --git a/envs/reacher_agent.py b/envs/reacher_agent.py index 04d9f69..1e958b7 100644 --- a/envs/reacher_agent.py +++ b/envs/reacher_agent.py @@ -30,31 +30,30 @@ def __init__(self, pos, size=2): self.block = None class SimpleReacherSimulator(BaseSimulator): - def __init__(self, **kwargs): + def __init__(self, num_blocks=2, **kwargs): # should always have same size blocks (width = height = length = n) super(SimpleReacherSimulator, self).__init__(**kwargs) self._imSz = 32 self.width = self._imSz self.height = self._imSz - self.move_block = [StackedBox(np.zeros(3)), StackedBox(np.zeros(3))] + self.move_block = [StackedBox(np.zeros(3)) for i in range(num_blocks)] self.goal = StackedBox(np.zeros(3)) self.reacher_arm = ReacherArm(np.zeros(2)) self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8) self._range_min = self.reacher_arm.size - self._range_max = 32 - self.reacher_arm.size + self._range_max = self._imSz - self.reacher_arm.size @property def done(self): for block in self.move_block: - if block.pos[2] != self.goal.size: + if not contains(block, self.goal): return False return True @overrides def step(self, action): action = action.reshape((2,)) - new_pos = self.reacher_arm.pos + action on_goal = contains(self.reacher_arm.block, self.goal) on_block = None @@ -70,14 +69,13 @@ def step(self, action): self.reacher_arm.block.pos[2] = self.goal.size self.reacher_arm.block = None + self.reacher_arm.pos = np.clip(self.reacher_arm.pos + action, self._range_min, self._range_max) + if self.reacher_arm.block is not None: block = self.reacher_arm.block - delta = new_pos[0] - self.reacher_arm.pos[0], new_pos[1] - self.reacher_arm.pos[1] - block.pos = np.array([block.pos[0] + delta[0], - block.pos[1] + delta[1], 32]) # to preserve relative position - block.pos = np.clip(block.pos, self._range_min, self._range_max) - - self.reacher_arm.pos = np.clip(new_pos, self._range_min, self._range_max) + action = np.append(action, 0) + block.pos = np.clip(block.pos + action, self._range_min, self._range_max) + block.pos[2] = self._imSz def _plot_object(self, coords, box, color='r'): x, y = coords @@ -139,7 +137,7 @@ def ndim(self): def scale(self, obs): obs = np.copy(obs) - obs = obs / 16 - 1 + obs = obs / float(self.simulator._imSz) * 2 - 1 return obs @overrides @@ -164,11 +162,8 @@ def contains_block(self): if hasattr(self.prms['sim'], 'move_block') and hasattr(self.prms['sim'], 'goal'): move_block = self.prms['sim'].move_block goal = self.prms['sim'].goal - reward = 0 - for block in move_block: - if block.pos[2] == goal.size: - reward += 1 - return reward / float(len(move_block)) + reward = 1 if all([contains(block, goal) for block in move_block]) else 0 + return reward return 0 @overrides @@ -206,7 +201,6 @@ def sample_env_init(self): if contains(sim.goal, block): block.pos[2] = sim.goal.size - def get_environment(obsType='StateIm', max_episode_length=100, initPrms={}, obsPrms={}, rewPrms={}, actPrms={}): sim = SimpleReacherSimulator() initPrms = {'sim' : sim} From 5a167850b0e69021b26cc5cf5d9da49c0df1e736 Mon Sep 17 00:00:00 2001 From: JasmineDeng Date: Fri, 21 Apr 2017 17:33:20 -0700 Subject: [PATCH 10/10] modified to use if height instead of contains for checking when terminal --- envs/reacher_agent.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/envs/reacher_agent.py b/envs/reacher_agent.py index 1e958b7..26da313 100644 --- a/envs/reacher_agent.py +++ b/envs/reacher_agent.py @@ -47,7 +47,7 @@ def __init__(self, num_blocks=2, **kwargs): @property def done(self): for block in self.move_block: - if not contains(block, self.goal): + if block.pos[2] != self.goal.size: return False return True @@ -162,7 +162,7 @@ def contains_block(self): if hasattr(self.prms['sim'], 'move_block') and hasattr(self.prms['sim'], 'goal'): move_block = self.prms['sim'].move_block goal = self.prms['sim'].goal - reward = 1 if all([contains(block, goal) for block in move_block]) else 0 + reward = 1 if all([block.pos[2] == goal.size for block in move_block]) else 0 return reward return 0