Skip to content

Commit a4673bb

Browse files
authored
Merge pull request #185 from utn-mi/juelg/improved-reward
feat: improve pick cube reward
2 parents 428c3f2 + 0bc3cb0 commit a4673bb

File tree

1 file changed

+38
-9
lines changed

1 file changed

+38
-9
lines changed

python/rcs/envs/sim.py

Lines changed: 38 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -199,6 +199,10 @@ def env_from_xml_paths(
199199
class RandomCubePos(SimWrapper):
200200
"""Wrapper to randomly place cube in the lab environments."""
201201

202+
def __init__(self, env: gym.Env, simulation: sim.Sim, include_rotation: bool = False):
203+
super().__init__(env, simulation)
204+
self.include_rotation = include_rotation
205+
202206
def reset(
203207
self, seed: int | None = None, options: dict[str, Any] | None = None
204208
) -> tuple[dict[str, Any], dict[str, Any]]:
@@ -212,7 +216,10 @@ def reset(
212216
pos_x = iso_cube[0] + np.random.random() * 0.2 - 0.1
213217
pos_y = iso_cube[1] + np.random.random() * 0.2 - 0.1
214218

215-
self.sim.data.joint("box_joint").qpos[:3] = [pos_x, pos_y, pos_z]
219+
if self.include_rotation:
220+
self.sim.data.joint("box-joint").qpos = [pos_x, pos_y, pos_z, 2 * np.random.random() - 1, 0, 0, 1]
221+
else:
222+
self.sim.data.joint("box-joint").qpos = [pos_x, pos_y, pos_z, 0, 0, 0, 1]
216223

217224
return obs, info
218225

@@ -229,17 +236,39 @@ def __init__(self, env):
229236
self.sim = env.get_wrapper_attr("sim")
230237

231238
def step(self, action: dict[str, Any]):
232-
obs, reward, done, truncated, info = super().step(action)
239+
obs, reward, _, truncated, info = super().step(action)
233240

234241
success = (
235-
self.sim.data.joint("box_joint").qpos[2] > 0.3 and obs["gripper"] == GripperWrapper.BINARY_GRIPPER_CLOSED
242+
self.sim.data.joint("box-joint").qpos[2] > 0.15 + 0.852
243+
and obs["gripper"] == GripperWrapper.BINARY_GRIPPER_CLOSED
236244
)
237-
diff_ee_cube = np.linalg.norm(
238-
self.sim.data.joint("box_joint").qpos[:3] - self.unwrapped.robot.get_cartesian_position().translation()
239-
)
240-
diff_cube_home = np.linalg.norm(self.sim.data.joint("box_joint").qpos[:3] - self.EE_HOME)
241-
reward = -diff_cube_home - diff_ee_cube
242-
245+
info["success"] = success
246+
if success:
247+
reward = 5
248+
else:
249+
tcp_to_obj_dist = np.linalg.norm(
250+
self.sim.data.joint("box-joint").qpos[:3] - self.unwrapped.robot.get_cartesian_position().translation()
251+
)
252+
obj_to_goal_dist = np.linalg.norm(self.sim.data.joint("box-joint").qpos[:3] - self.EE_HOME)
253+
254+
# old reward
255+
# reward = -obj_to_goal_dist - tcp_to_obj_dist
256+
257+
# Maniskill grasp reward
258+
reaching_reward = 1 - np.tanh(5 * tcp_to_obj_dist)
259+
reward = reaching_reward
260+
is_grasped = info["is_grasped"]
261+
reward += is_grasped
262+
place_reward = 1 - np.tanh(5 * obj_to_goal_dist)
263+
reward += place_reward * is_grasped
264+
265+
# velocities are currently always zero after a step
266+
# qvel = self.agent.robot.get_qvel()
267+
# static_reward = 1 - np.tanh(5 * np.linalg.norm(qvel, axis=1))
268+
# reward += static_reward * info["is_obj_placed"]
269+
270+
# normalize
271+
reward /= 5 # type: ignore
243272
return obs, reward, success, truncated, info
244273

245274

0 commit comments

Comments
 (0)