Skip to content
10 changes: 6 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@ An environment is defined by:
- ActionProcessor: an object of `type BaseAction` that defines functions that process the action. Generally simulator works
by applying continous forces/torque, however the desired action space might be discrete or continuous.
`BaseAction` objects allow the user to easily switch between different action spaces.

### Interactive Mode

To run environments in interactive mode, a function mapping the keyboard inputs into commands supplied to the agent must be defined. This function is typically called, `str2action` in `rlmaster` repository. The environment can be run in interactive mode in the following way:

```
Expand All @@ -31,7 +32,9 @@ env.interactive(move_single_env.str2action)
```
You can use the commands, `w, s, d, a` to move the agent and `q` to quit the interactive mode.


### Cheetah in Interactive Mode

No interface has been developed yet, but after creating an instance of SimpleStackerAgent in from `stacker_agent`, calling `_setup_renderer` and `render` will open a display window. You can step by entering an xy position that will move the second block there instantly. Calling `render` again will show the updated positions of the blocks. They are initialized at position (0,0) if not called with the initializer.

##Environment in openAI gym format

Expand All @@ -41,8 +44,7 @@ from core import gym_wrapper
env = move_agent.get_environment()
gymEnv = gym_wwapper.GymWrapper(env)
```






20 changes: 11 additions & 9 deletions core/gym_wrapper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,10 @@
import numpy as np
from core import base_environment
import rlmaster.core.base_environment as base_environment
from gym import spaces
from rllab.spaces.box import Box
from rllab.core.serializable import Serializable
from rllab.misc.overrides import overrides
from rllab.misc import logger

class GymWrapper(object):
def __init__(self, env):
Expand All @@ -11,16 +15,15 @@ def __init__(self, env):
else:
assert isinstance(self.env.action_processor,
base_environment.BaseContinuousAction)
self.action_space = spaces.Box(
self.action_space = Box(
low = env.action_processor.minval(),
high = env.action_processor.maxval(),
shape=(env.action_processor.action_dim(),
env.action_processor.num_actions()))
shape=(env.action_processor.action_dim()))
obsNdim = self.env.observation_ndim()
obsKeys = obsNdim.keys()
obsKeys = list(obsNdim.keys())
assert len(obsKeys) == 1, 'gym only supports one observation type'
self._obsKey = obsKeys[0]
self.observation_space = spaces.Box(low=0, high=255, shape=obsNdim[obsKeys[0]])
self.observation_space = Box(low=0, high=255, shape=obsNdim[obsKeys[0]])

@property
def frameskip(self):
Expand Down Expand Up @@ -59,7 +62,7 @@ def _step(self, action):
self.env.step(action)
obs = self._observation()
reward = self.env.reward()
done = False
done = self.env.simulator.done # test?
return obs, reward, done, dict(reward=reward)


Expand All @@ -68,7 +71,7 @@ def step(self, action):


def viewer_setup(self):
self.env._renderer_setup()
self.env.setup_renderer()


def render(self):
Expand All @@ -77,4 +80,3 @@ def render(self):

def _render(self):
return self.env.render()

135 changes: 135 additions & 0 deletions envs/cheetah_agent.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
from rlmaster.core.base_environment import *
import numpy as np
from overrides import overrides
from rllab.mujoco_py import MjModel, MjViewer

class HalfCheetahSimulator(BaseSimulator):
def __init__(self, **kwargs):
super(HalfCheetahSimulator, self).__init__(**kwargs)

self._imSz = 512
self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8)

self.model = MjModel('../rlmaster/envs/mujoco_envs/xmls/half_cheetah.xml')
self.viewer = None
self._pos = {}
self._pos['torso'] = np.zeros((3,))
self._range_min = -1
self._range_max = 1

self.body_comvel = 0
self.action = np.zeros((1, 2))
self.init_qpos = self.model.data.qpos
self.init_qvel = self.model.data.qvel
self.init_qacc = self.model.data.qacc
self.init_ctrl = self.model.data.ctrl
self.frame_skip = 1

@overrides
def step(self, ctrl, loop=False):
ctrl = np.clip(ctrl, *(np.array([-1, -1, -1, -1, -1, -1]), np.array([1, 1, 1, 1, 1, 1])))
self.model.data.ctrl = ctrl # + np.random.normal(size=ctrl.shape)
# print('gym', self.model.data.ctrl)
for i in range(self.frame_skip):
self.model.step()
self.model.forward()
ind = self.model.body_names.index('torso')
self._pos['torso'] = self.model.body_pos[ind]
self.body_comvel = self.model.body_comvels[ind]
self.action = ctrl

@overrides
def get_image(self):
data, width, height = self.viewer.get_image()
self._im = np.fromstring(data, dtype='uint8').reshape(height, width, 3)[::-1,:,:]
# print(self._im)
return self._im.copy()

@overrides
def _setup_renderer(self):
self.viewer = MjViewer(visible=True, init_width=self._imSz, init_height=self._imSz)
self.viewer.start()
self.viewer.set_model(self.model)

@overrides
def render(self):
self.viewer.loop_once()

class CheetahIm(BaseObservation):

def get_body_com(self, body_name):
idx = self.simulator.model.body_names.index(body_name)
return self.simulator.model.data.com_subtree[idx]

@overrides
def ndim(self):
dim = {}
dim['im'] = (1, 20)
return dim

@overrides
def observation(self):
obs = {}
current_obs = np.concatenate([
self.simulator.model.data.qpos.flatten()[1:],
self.simulator.model.data.qvel.flat,
self.get_body_com('torso').flat,
])
obs['im'] = current_obs
return obs

class RewardCheetah(BaseRewarder):

@property
def action(self):
return self.prms['sim'].action if hasattr(self.prms['sim'], 'action') else np.zeros((1,2))

@property
def body_comvel(self):
return self.prms['sim'].body_comvel if hasattr(self.prms['sim'], 'body_comvel') else 0

@overrides
def get(self):
ctrl_cost = 1e-1 * 0.5 * np.sum(np.square(self.action))
run_cost = -1 * self.body_comvel[0]
return -(ctrl_cost + run_cost)

class ContinuousCheetahAction(BaseContinuousAction):
@overrides
def action_dim(self):
return 6

def minval(self):
return -1.25

def maxval(self):
return 1.1

@overrides
def process(self, action):
return action

# TODO(jasmine): fix this so it initializes (is it called at the start of each epoch? find out)
class InitCheetah(BaseInitializer):

def reset_mujoco(self):
self.simulator.model.data.qpos = self.simulator.init_qpos + 0.01 * np.random.normal(size=self.simulator.init_qpos.shape)
self.simulator.model.data.qvel = self.simulator.init_qvel + 0.1 * np.random.normal(size=self.simulator.init_qvel.shape)
self.simulator.model.data.qacc = self.simulator.init_qacc
self.simulator.model.data.ctrl = self.simulator.init_ctrl

@overrides
def sample_env_init(self):
self.reset_mujoco()
self.simulator.model.forward()

def get_environment(max_episode_length=100, initPrms={}, obsPrms={}, actPrms={}):
sim = HalfCheetahSimulator()
initObj = InitCheetah(sim, initPrms)
obsObj = CheetahIm(sim, obsPrms)
rewPrms = { 'sim': sim }
rewObj = RewardCheetah(sim, rewPrms)
actObj = ContinuousCheetahAction(actPrms)
env = BaseEnvironment(sim, initObj, obsObj, rewObj, actObj,
params={'max_episode_length':max_episode_length})
return env
31 changes: 17 additions & 14 deletions envs/move_agent.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from core.base_environment import *
from rlmaster.core.base_environment import *
import numpy as np
from overrides import overrides
from rllab.mujoco_py import MjModel

from pyhelper_fns import vis_utils

def str2action(cmd):
Expand Down Expand Up @@ -54,7 +56,6 @@ def process(self, action):
raise Exception('Action %s not recognized' % action)
ctrl = np.array(ctrl).reshape((2,))
return ctrl


class ContinuousAction(BaseContinuousAction):
@overrides
Expand All @@ -65,6 +66,11 @@ def action_dim(self):
def process(self, action):
return action

def minval(self):
return -1

def maxval(self):
return 1

class MoveTeleportSimulator(BaseSimulator):
def __init__(self, **kwargs):
Expand All @@ -79,7 +85,7 @@ def __init__(self, **kwargs):
#Manipulate radius
self._manipulate_radius = 0.2
#Image size
self._imSz = 64
self._imSz = 32
self._im = np.zeros((self._imSz, self._imSz, 3), dtype=np.uint8)

def object_names(self):
Expand Down Expand Up @@ -148,7 +154,6 @@ def _setup_renderer(self):
def render(self):
self._canvas._display(self.get_image())


class InitFixed(BaseInitializer):
@overrides
def sample_env_init(self):
Expand All @@ -165,7 +170,6 @@ def sample_env_init(self):
self.simulator._pos[k] = range_mag * self.random.rand(2,) + \
self.simulator._range_min


class ObsState(BaseObservation):
@overrides
def ndim(self):
Expand All @@ -180,22 +184,21 @@ def observation(self):
for i, k in enumerate(self.simulator._pos.keys()):
obs[2*i, 2*i + 2] = self.simulator._pos[k].copy()
return obs


class ObsIm(BaseObservation):
@overrides
def ndim(self):
dim = {}
dim['im'] = (self.simulator._imSz, self.simulator._imSz, 3)
# dim['im'] = (self.simulator._imSz, self.simulator._imSz, 3)
dim['im'] = (3072, 1)
return dim

@overrides
def observation(self):
obs = {}
obs['im'] = self.simulator.get_image()
obs['im'] = self.simulator.get_image().flatten()
return obs


class RewardSimple(BaseRewarder):
#The radius around the goal in which reward is provided to the agent.
@property
Expand All @@ -208,15 +211,15 @@ def get(self):
return 1
else:
return 0


def get_environment(initName='InitRandom', obsName='ObsIm', rewName='RewardSimple',
actType='DiscreteActionFour', max_episode_length=100,
initPrms={}, obsPrms={}, rewPrms={}, actPrms={}):

sim = MoveTeleportSimulator()
actType='DiscreteActionFour', simName='MoveTeleportSimulator',
max_episode_length=100, initPrms={}, obsPrms={}, rewPrms={},
actPrms={}):
sim = globals()[simName]()
initObj = globals()[initName](sim, initPrms)
obsObj = globals()[obsName](sim, obsPrms)
rewPrms = { 'sim': sim }
rewObj = globals()[rewName](sim, rewPrms)
actObj = globals()[actType](actPrms)
env = BaseEnvironment(sim, initObj, obsObj, rewObj, actObj,
Expand Down
Loading