From 44ed4e95b69e60791342356d55fa90ec2f1ce7fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Wed, 21 May 2025 10:49:38 +0200 Subject: [PATCH 1/2] feat: improve pick cube reward Implements a distance based reward for pick cube inspired by the Maniskill reward function. --- python/rcs/envs/sim.py | 47 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 38 insertions(+), 9 deletions(-) diff --git a/python/rcs/envs/sim.py b/python/rcs/envs/sim.py index 02e64b7d..cc6267ad 100644 --- a/python/rcs/envs/sim.py +++ b/python/rcs/envs/sim.py @@ -199,6 +199,10 @@ def env_from_xml_paths( class RandomCubePos(SimWrapper): """Wrapper to randomly place cube in the lab environments.""" + def __init__(self, env: gym.Env, include_rotation: bool = False): + super().__init__(env) + self.include_rotation = include_rotation + def reset( self, seed: int | None = None, options: dict[str, Any] | None = None ) -> tuple[dict[str, Any], dict[str, Any]]: @@ -212,7 +216,10 @@ def reset( pos_x = iso_cube[0] + np.random.random() * 0.2 - 0.1 pos_y = iso_cube[1] + np.random.random() * 0.2 - 0.1 - self.sim.data.joint("box_joint").qpos[:3] = [pos_x, pos_y, pos_z] + if self.include_rotation: + self.sim.data.joint("box-joint").qpos = [pos_x, pos_y, pos_z, 2 * np.random.random() - 1, 0, 0, 1] + else: + self.sim.data.joint("box-joint").qpos = [pos_x, pos_y, pos_z, 0, 0, 0, 1] return obs, info @@ -229,17 +236,39 @@ def __init__(self, env): self.sim = env.get_wrapper_attr("sim") def step(self, action: dict[str, Any]): - obs, reward, done, truncated, info = super().step(action) + obs, reward, _, truncated, info = super().step(action) success = ( - self.sim.data.joint("box_joint").qpos[2] > 0.3 and obs["gripper"] == GripperWrapper.BINARY_GRIPPER_CLOSED + self.sim.data.joint("box-joint").qpos[2] > 0.15 + 0.852 + and obs["gripper"] == GripperWrapper.BINARY_GRIPPER_CLOSED ) - diff_ee_cube = np.linalg.norm( - self.sim.data.joint("box_joint").qpos[:3] - self.unwrapped.robot.get_cartesian_position().translation() - ) - diff_cube_home = np.linalg.norm(self.sim.data.joint("box_joint").qpos[:3] - self.EE_HOME) - reward = -diff_cube_home - diff_ee_cube - + info["success"] = success + if success: + reward = 5 + else: + tcp_to_obj_dist = np.linalg.norm( + self.sim.data.joint("box-joint").qpos[:3] - self.unwrapped.robot.get_cartesian_position().translation() + ) + obj_to_goal_dist = np.linalg.norm(self.sim.data.joint("box-joint").qpos[:3] - self.EE_HOME) + + # old reward + # reward = -obj_to_goal_dist - tcp_to_obj_dist + + # Maniskill grasp reward + reaching_reward = 1 - np.tanh(5 * tcp_to_obj_dist) + reward = reaching_reward + is_grasped = info["is_grasped"] + reward += is_grasped + place_reward = 1 - np.tanh(5 * obj_to_goal_dist) + reward += place_reward * is_grasped + + # velocities are currently always zero after a step + # qvel = self.agent.robot.get_qvel() + # static_reward = 1 - np.tanh(5 * np.linalg.norm(qvel, axis=1)) + # reward += static_reward * info["is_obj_placed"] + + # normalize + reward /= 5 return obs, reward, success, truncated, info From 0bc3cb0a5bdfcd0ec1d5aa9440813f474caefb47 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Wed, 21 May 2025 11:25:58 +0200 Subject: [PATCH 2/2] style: fix type checker --- python/rcs/envs/sim.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/rcs/envs/sim.py b/python/rcs/envs/sim.py index cc6267ad..932076b5 100644 --- a/python/rcs/envs/sim.py +++ b/python/rcs/envs/sim.py @@ -199,8 +199,8 @@ def env_from_xml_paths( class RandomCubePos(SimWrapper): """Wrapper to randomly place cube in the lab environments.""" - def __init__(self, env: gym.Env, include_rotation: bool = False): - super().__init__(env) + def __init__(self, env: gym.Env, simulation: sim.Sim, include_rotation: bool = False): + super().__init__(env, simulation) self.include_rotation = include_rotation def reset( @@ -268,7 +268,7 @@ def step(self, action: dict[str, Any]): # reward += static_reward * info["is_obj_placed"] # normalize - reward /= 5 + reward /= 5 # type: ignore return obs, reward, success, truncated, info