From e20655ebcc51356ed4d51d5818970ac9684f6ec0 Mon Sep 17 00:00:00 2001 From: SsnL Date: Sat, 19 Mar 2022 14:24:10 -0400 Subject: [PATCH] support .seed use a wrapper and fix old API --- robodesk/__init__.py | 2 ++ robodesk/robodesk.py | 40 +++++++++++++++++++++++++++++++--------- 2 files changed, 33 insertions(+), 9 deletions(-) diff --git a/robodesk/__init__.py b/robodesk/__init__.py index cecab33..bd90a05 100644 --- a/robodesk/__init__.py +++ b/robodesk/__init__.py @@ -1 +1,3 @@ from .robodesk import RoboDesk + +__all__ = ['RoboDesk'] diff --git a/robodesk/robodesk.py b/robodesk/robodesk.py index 7c95c42..44c055a 100644 --- a/robodesk/robodesk.py +++ b/robodesk/robodesk.py @@ -13,6 +13,24 @@ from PIL import Image +class NumPyRNGWrapper(object): + def __init__(self): + self.seed(None) + + def seed(self, seed=None): + # seed: Union[int, np.random.SeedSequence, None] + if hasattr(np.random, 'Generator'): + self.rng = np.random.Generator(np.random.PCG64(seed)) + else: + self.rng = np.random.RandomState(seed=seed) + + def uniform(self, low=0.0, high=1.0, size=None): + return self.rng.uniform(low=low, high=high, size=size) + + def random(self): + return self.rng.random() + + class RoboDesk(gym.Env): """Multi-task manipulation environment.""" @@ -93,6 +111,10 @@ def __init__(self, task='open_slide', reward='dense', action_repeat=1, self.all_tasks = list(self.reward_functions) self.task = task # pylint: enable=g-long-lambda + self.np_rng = NumPyRNGWrapper() + + def seed(self, seed=None): + self.np_rng.seed(seed) @property def action_space(self): @@ -199,7 +221,7 @@ def step(self, action): def _get_init_robot_pos(self): init_joint_pose = np.array( [-0.30, -0.4, 0.28, -2.5, 0.13, 1.87, 0.91, 0.01, 0.01]) - init_joint_pose += 0.15 * np.random.uniform( + init_joint_pose += 0.15 * self.np_rng.uniform( low=self.physics.model.actuator_ctrlrange[:self.num_joints, 0], high=self.physics.model.actuator_ctrlrange[:self.num_joints, 1]) return init_joint_pose @@ -212,17 +234,17 @@ def reset(self): self.physics.reset() # Randomize object positions. - self.physics.named.data.qpos['drawer_joint'] -= 0.10 * np.random.random() - self.physics.named.data.qpos['slide_joint'] += 0.20 * np.random.random() + self.physics.named.data.qpos['drawer_joint'] -= 0.10 * self.np_rng.random() + self.physics.named.data.qpos['slide_joint'] += 0.20 * self.np_rng.random() - self.physics.named.data.qpos['flat_block'][0] += 0.3 * np.random.random() - self.physics.named.data.qpos['flat_block'][1] += 0.07 * np.random.random() - self.physics.named.data.qpos['ball'][0] += 0.48 * np.random.random() - self.physics.named.data.qpos['ball'][1] += 0.08 * np.random.random() + self.physics.named.data.qpos['flat_block'][0] += 0.3 * self.np_rng.random() + self.physics.named.data.qpos['flat_block'][1] += 0.07 * self.np_rng.random() + self.physics.named.data.qpos['ball'][0] += 0.48 * self.np_rng.random() + self.physics.named.data.qpos['ball'][1] += 0.08 * self.np_rng.random() self.physics.named.data.qpos['upright_block'][0] += ( - 0.3 * np.random.random() + 0.05) + 0.3 * self.np_rng.random() + 0.05) self.physics.named.data.qpos['upright_block'][1] += ( - 0.05 * np.random.random()) + 0.05 * self.np_rng.random()) # Set robot position. self.physics.data.qpos[:self.num_joints] = self._get_init_robot_pos()