From 44ed4e95b69e60791342356d55fa90ec2f1ce7fa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 21 May 2025 10:49:38 +0200
Subject: [PATCH 1/2] feat: improve pick cube reward

Implements a distance based reward for pick cube inspired by the Maniskill reward function.
---
 python/rcs/envs/sim.py | 47 ++++++++++++++++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 9 deletions(-)

diff --git a/python/rcs/envs/sim.py b/python/rcs/envs/sim.py
index 02e64b7d..cc6267ad 100644
--- a/python/rcs/envs/sim.py
+++ b/python/rcs/envs/sim.py
@@ -199,6 +199,10 @@ def env_from_xml_paths(
 class RandomCubePos(SimWrapper):
     """Wrapper to randomly place cube in the lab environments."""
 
+    def __init__(self, env: gym.Env, include_rotation: bool = False):
+        super().__init__(env)
+        self.include_rotation = include_rotation
+
     def reset(
         self, seed: int | None = None, options: dict[str, Any] | None = None
     ) -> tuple[dict[str, Any], dict[str, Any]]:
@@ -212,7 +216,10 @@ def reset(
         pos_x = iso_cube[0] + np.random.random() * 0.2 - 0.1
         pos_y = iso_cube[1] + np.random.random() * 0.2 - 0.1
 
-        self.sim.data.joint("box_joint").qpos[:3] = [pos_x, pos_y, pos_z]
+        if self.include_rotation:
+            self.sim.data.joint("box-joint").qpos = [pos_x, pos_y, pos_z, 2 * np.random.random() - 1, 0, 0, 1]
+        else:
+            self.sim.data.joint("box-joint").qpos = [pos_x, pos_y, pos_z, 0, 0, 0, 1]
 
         return obs, info
 
@@ -229,17 +236,39 @@ def __init__(self, env):
         self.sim = env.get_wrapper_attr("sim")
 
     def step(self, action: dict[str, Any]):
-        obs, reward, done, truncated, info = super().step(action)
+        obs, reward, _, truncated, info = super().step(action)
 
         success = (
-            self.sim.data.joint("box_joint").qpos[2] > 0.3 and obs["gripper"] == GripperWrapper.BINARY_GRIPPER_CLOSED
+            self.sim.data.joint("box-joint").qpos[2] > 0.15 + 0.852
+            and obs["gripper"] == GripperWrapper.BINARY_GRIPPER_CLOSED
         )
-        diff_ee_cube = np.linalg.norm(
-            self.sim.data.joint("box_joint").qpos[:3] - self.unwrapped.robot.get_cartesian_position().translation()
-        )
-        diff_cube_home = np.linalg.norm(self.sim.data.joint("box_joint").qpos[:3] - self.EE_HOME)
-        reward = -diff_cube_home - diff_ee_cube
-
+        info["success"] = success
+        if success:
+            reward = 5
+        else:
+            tcp_to_obj_dist = np.linalg.norm(
+                self.sim.data.joint("box-joint").qpos[:3] - self.unwrapped.robot.get_cartesian_position().translation()
+            )
+            obj_to_goal_dist = np.linalg.norm(self.sim.data.joint("box-joint").qpos[:3] - self.EE_HOME)
+
+            # old reward
+            # reward = -obj_to_goal_dist - tcp_to_obj_dist
+
+            # Maniskill grasp reward
+            reaching_reward = 1 - np.tanh(5 * tcp_to_obj_dist)
+            reward = reaching_reward
+            is_grasped = info["is_grasped"]
+            reward += is_grasped
+            place_reward = 1 - np.tanh(5 * obj_to_goal_dist)
+            reward += place_reward * is_grasped
+
+            # velocities are currently always zero after a step
+            # qvel = self.agent.robot.get_qvel()
+            # static_reward = 1 - np.tanh(5 * np.linalg.norm(qvel, axis=1))
+            # reward += static_reward * info["is_obj_placed"]
+
+        # normalize
+        reward /= 5
         return obs, reward, success, truncated, info
 
 

From 0bc3cb0a5bdfcd0ec1d5aa9440813f474caefb47 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 21 May 2025 11:25:58 +0200
Subject: [PATCH 2/2] style: fix type checker

---
 python/rcs/envs/sim.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/rcs/envs/sim.py b/python/rcs/envs/sim.py
index cc6267ad..932076b5 100644
--- a/python/rcs/envs/sim.py
+++ b/python/rcs/envs/sim.py
@@ -199,8 +199,8 @@ def env_from_xml_paths(
 class RandomCubePos(SimWrapper):
     """Wrapper to randomly place cube in the lab environments."""
 
-    def __init__(self, env: gym.Env, include_rotation: bool = False):
-        super().__init__(env)
+    def __init__(self, env: gym.Env, simulation: sim.Sim, include_rotation: bool = False):
+        super().__init__(env, simulation)
         self.include_rotation = include_rotation
 
     def reset(
@@ -268,7 +268,7 @@ def step(self, action: dict[str, Any]):
             # reward += static_reward * info["is_obj_placed"]
 
         # normalize
-        reward /= 5
+        reward /= 5  # type: ignore
         return obs, reward, success, truncated, info