@@ -199,6 +199,10 @@ def env_from_xml_paths(
199199class RandomCubePos (SimWrapper ):
200200 """Wrapper to randomly place cube in the lab environments."""
201201
202+ def __init__ (self , env : gym .Env , simulation : sim .Sim , include_rotation : bool = False ):
203+ super ().__init__ (env , simulation )
204+ self .include_rotation = include_rotation
205+
202206 def reset (
203207 self , seed : int | None = None , options : dict [str , Any ] | None = None
204208 ) -> tuple [dict [str , Any ], dict [str , Any ]]:
@@ -212,7 +216,10 @@ def reset(
212216 pos_x = iso_cube [0 ] + np .random .random () * 0.2 - 0.1
213217 pos_y = iso_cube [1 ] + np .random .random () * 0.2 - 0.1
214218
215- self .sim .data .joint ("box_joint" ).qpos [:3 ] = [pos_x , pos_y , pos_z ]
219+ if self .include_rotation :
220+ self .sim .data .joint ("box-joint" ).qpos = [pos_x , pos_y , pos_z , 2 * np .random .random () - 1 , 0 , 0 , 1 ]
221+ else :
222+ self .sim .data .joint ("box-joint" ).qpos = [pos_x , pos_y , pos_z , 0 , 0 , 0 , 1 ]
216223
217224 return obs , info
218225
@@ -229,17 +236,39 @@ def __init__(self, env):
229236 self .sim = env .get_wrapper_attr ("sim" )
230237
231238 def step (self , action : dict [str , Any ]):
232- obs , reward , done , truncated , info = super ().step (action )
239+ obs , reward , _ , truncated , info = super ().step (action )
233240
234241 success = (
235- self .sim .data .joint ("box_joint" ).qpos [2 ] > 0.3 and obs ["gripper" ] == GripperWrapper .BINARY_GRIPPER_CLOSED
242+ self .sim .data .joint ("box-joint" ).qpos [2 ] > 0.15 + 0.852
243+ and obs ["gripper" ] == GripperWrapper .BINARY_GRIPPER_CLOSED
236244 )
237- diff_ee_cube = np .linalg .norm (
238- self .sim .data .joint ("box_joint" ).qpos [:3 ] - self .unwrapped .robot .get_cartesian_position ().translation ()
239- )
240- diff_cube_home = np .linalg .norm (self .sim .data .joint ("box_joint" ).qpos [:3 ] - self .EE_HOME )
241- reward = - diff_cube_home - diff_ee_cube
242-
245+ info ["success" ] = success
246+ if success :
247+ reward = 5
248+ else :
249+ tcp_to_obj_dist = np .linalg .norm (
250+ self .sim .data .joint ("box-joint" ).qpos [:3 ] - self .unwrapped .robot .get_cartesian_position ().translation ()
251+ )
252+ obj_to_goal_dist = np .linalg .norm (self .sim .data .joint ("box-joint" ).qpos [:3 ] - self .EE_HOME )
253+
254+ # old reward
255+ # reward = -obj_to_goal_dist - tcp_to_obj_dist
256+
257+ # Maniskill grasp reward
258+ reaching_reward = 1 - np .tanh (5 * tcp_to_obj_dist )
259+ reward = reaching_reward
260+ is_grasped = info ["is_grasped" ]
261+ reward += is_grasped
262+ place_reward = 1 - np .tanh (5 * obj_to_goal_dist )
263+ reward += place_reward * is_grasped
264+
265+ # velocities are currently always zero after a step
266+ # qvel = self.agent.robot.get_qvel()
267+ # static_reward = 1 - np.tanh(5 * np.linalg.norm(qvel, axis=1))
268+ # reward += static_reward * info["is_obj_placed"]
269+
270+ # normalize
271+ reward /= 5 # type: ignore
243272 return obs , reward , success , truncated , info
244273
245274
0 commit comments