From 4a2a35047bc8a9860ba9b96abfba76b514a434f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Fri, 29 Nov 2024 17:56:44 +0100
Subject: [PATCH 01/11] feat(gym env): register envs in gym

---
 python/rcsss/__init__.py       | 14 ++++++
 python/rcsss/envs/factories.py | 86 +++++++++++++++++++++++++++++++++-
 2 files changed, 99 insertions(+), 1 deletion(-)

diff --git a/python/rcsss/__init__.py b/python/rcsss/__init__.py
index 515972bc..67dcd663 100644
--- a/python/rcsss/__init__.py
+++ b/python/rcsss/__init__.py
@@ -3,11 +3,25 @@
 import pathlib
 import site
 
+from gymnasium import register
 from rcsss import camera, control, envs, sim
 from rcsss._core import __version__, common, hw
+from rcsss.envs.factories import FR3Real, FR3SimplePickUpSim
 
+# available mujoco scenes
 scenes = {
     path.stem: path / "scene.mjb" for path in (pathlib.Path(site.getsitepackages()[0]) / "rcsss" / "scenes").glob("*")
 }
 
+# make submodules available
 __all__ = ["__doc__", "__version__", "common", "hw", "sim", "camera", "scenes", "control", "envs"]
+
+# register gymnasium environments
+register(
+    id="rcs/SimplePickUpSim-v0",
+    entry_point=FR3SimplePickUpSim(),
+)
+register(
+    id="rcs/FR3Real-v0",
+    entry_point=FR3Real(),
+)
diff --git a/python/rcsss/envs/factories.py b/python/rcsss/envs/factories.py
index ccea92ee..31466606 100644
--- a/python/rcsss/envs/factories.py
+++ b/python/rcsss/envs/factories.py
@@ -2,6 +2,8 @@
 from os import PathLike
 
 import gymnasium as gym
+from gymnasium.envs import EnvCreator
+import numpy as np
 import rcsss
 from rcsss import sim
 from rcsss._core.hw import FR3Config, IKController
@@ -43,7 +45,9 @@ def default_fr3_hw_gripper_cfg():
     return gripper_cfg
 
 
-def default_realsense(name2id: dict[str, str]):
+def default_realsense(name2id: dict[str, str] | None) -> RealSenseCameraSet | None:
+    if name2id is None:
+        return None
     cameras = {name: BaseCameraConfig(identifier=id) for name, id in name2id.items()}
     cam_cfg = RealSenseSetConfig(
         cameras=cameras,
@@ -229,3 +233,83 @@ def fr3_sim_env(
         env = RelativeActionSpace(env, max_mov=max_relative_movement, relative_to=relative_to)
 
     return env
+
+
+class FR3Real(EnvCreator):
+    def __call__(
+        self,
+        robot_ip: str,
+        control_mode: str = "xyzrpy",
+        delta_actions: bool = True,
+        camera_config: dict[str, str] | None = None,
+        gripper: bool = True,
+    ) -> gym.Env:
+        camera_set = default_realsense(camera_config)
+        env_rel = fr3_hw_env(
+            ip=robot_ip,
+            camera_set=camera_set,
+            control_mode=(
+                ControlMode.CARTESIAN_TRPY
+                if control_mode == "xyzrpy"
+                else ControlMode.JOINTS if control_mode == "joints" else ControlMode.CARTESIAN_TQuart
+            ),
+            robot_cfg=default_fr3_hw_robot_cfg(),
+            collision_guard=None,
+            gripper_cfg=default_fr3_hw_gripper_cfg() if gripper else None,
+            max_relative_movement=(0.2, np.deg2rad(45)) if delta_actions else None,
+            relative_to=RelativeTo.LAST_STEP,
+        )
+
+        return env_rel
+
+
+class FR3SimplePickUpSim(EnvCreator):
+    def __call__(
+        self,
+        render_mode: str = "human",
+        control_mode: str = "xyzrpy",
+        resolution: tuple[int, int] | None = None,
+        frame_rate: int = 10,
+        delta_actions: bool = True,
+    ) -> gym.Env:
+        if resolution is None:
+            resolution = (256, 256)
+
+        cameras = {
+            "wrist": SimCameraConfig(identifier="eye-in-hand_0", type=int(CameraType.fixed)),
+            "bird_eye": SimCameraConfig(identifier="bird-eye-cam", type=int(CameraType.fixed)),
+            "side": SimCameraConfig(identifier="side_view", type=int(CameraType.fixed)),
+            "right_side": SimCameraConfig(identifier="right_side", type=int(CameraType.fixed)),
+            "left_side": SimCameraConfig(identifier="left_side", type=int(CameraType.fixed)),
+            "front": SimCameraConfig(identifier="front", type=int(CameraType.fixed)),
+        }
+
+        camera_cfg = SimCameraSetConfig(
+            cameras=cameras,
+            resolution_width=resolution[0],
+            resolution_height=resolution[1],
+            frame_rate=frame_rate,
+            physical_units=True,
+        )
+        env_rel = fr3_sim_env(
+            control_mode=(
+                ControlMode.CARTESIAN_TRPY
+                if control_mode == "xyzrpy"
+                else ControlMode.JOINTS if control_mode == "joints" else ControlMode.CARTESIAN_TQuart
+            ),
+            robot_cfg=default_fr3_sim_robot_cfg(),
+            collision_guard=False,
+            gripper_cfg=default_fr3_sim_gripper_cfg(),
+            camera_set_cfg=camera_cfg,
+            max_relative_movement=(0.2, np.deg2rad(45)) if delta_actions else None,
+            relative_to=RelativeTo.LAST_STEP,
+            mjcf="fr3_simple_pick_up",
+        )
+        if render_mode == "human":
+            env_rel.get_wrapper_attr("sim").open_gui()
+
+        # TODO:
+        # - randomize location of the yellow cube
+        # - wrapper for success/failure and reward
+
+        return env_rel

From 43b377041f543ef1dc378eb898afc0742f248824 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 3 Dec 2024 16:47:55 +0100
Subject: [PATCH 02/11] feat(sim): sim wrapper env

a sim wrapper env is an env that has access to the fr3sim wrapper (has mjdata and mjmodel attributes) and is between robot and fr3sim.
This means that its reset method will be called after the sim has been resetted which is useful when resetting objects.
---
 python/rcsss/envs/factories.py |  5 ++++-
 python/rcsss/envs/sim.py       | 15 +++++++++++++--
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/python/rcsss/envs/factories.py b/python/rcsss/envs/factories.py
index 31466606..090a74a6 100644
--- a/python/rcsss/envs/factories.py
+++ b/python/rcsss/envs/factories.py
@@ -177,6 +177,7 @@ def fr3_sim_env(
     relative_to: RelativeTo = RelativeTo.LAST_STEP,
     urdf_path: str | PathLike | None = None,
     mjcf: str | PathLike = "fr3_empty_world",
+    sim_wrapper: Type[SimWrapper] | None = None,
 ) -> gym.Env[ObsArmsGrCam, LimitedJointsRelDictType]:
     """
     Creates a simulation environment for the FR3 robot.
@@ -194,6 +195,8 @@ def fr3_sim_env(
         urdf_path (str | PathLike | None): Path to the URDF file. If None, the URDF file is automatically deduced
             which requires a UTN compatible lab scene to be present.
         mjcf (str | PathLike): Path to the Mujoco scene XML file. Defaults to "fr3_empty_world".
+        sim_wrapper (Type[SimWrapper] | None): Wrapper to be applied before the simulation wrapper. This is useful
+            for reset management e.g. resetting objects in the scene with correct observations. Defaults to None.
 
     Returns:
         gym.Env: The configured simulation environment for the FR3 robot.
@@ -208,7 +211,7 @@ def fr3_sim_env(
     robot = rcsss.sim.FR3(simulation, "0", ik)
     robot.set_parameters(robot_cfg)
     env: gym.Env = FR3Env(robot, control_mode)
-    env = FR3Sim(env, simulation)
+    env = FR3Sim(env, simulation, sim_wrapper)
 
     if camera_set_cfg is not None:
         camera_set = SimCameraSet(simulation, camera_set_cfg)
diff --git a/python/rcsss/envs/sim.py b/python/rcsss/envs/sim.py
index 581dff25..3dc9ea5a 100644
--- a/python/rcsss/envs/sim.py
+++ b/python/rcsss/envs/sim.py
@@ -1,14 +1,24 @@
 import logging
-from typing import Any, SupportsFloat, cast
+from typing import Any, SupportsFloat, Type, cast
 
 import gymnasium as gym
+import numpy as np
 import rcsss
 from rcsss import sim
 from rcsss.envs.base import ControlMode, FR3Env, GripperWrapper
 
+class SimWrapper(gym.Wrapper):
+    def __init__(self, env: gym.Env, simulation: sim.Sim):
+        super().__init__(env)
+        self.unwrapped: FR3Env
+        assert isinstance(self.unwrapped.robot, sim.FR3), "Robot must be a sim.FR3 instance."
+        self.sim = simulation
+
 
 class FR3Sim(gym.Wrapper):
-    def __init__(self, env, simulation: sim.Sim):
+    def __init__(self, env, simulation: sim.Sim, sim_wrapper: Type[SimWrapper] | None = None):
+        if sim_wrapper is not None:
+            env = sim_wrapper(env, simulation)
         super().__init__(env)
         self.unwrapped: FR3Env
         assert isinstance(self.unwrapped.robot, sim.FR3), "Robot must be a sim.FR3 instance."
@@ -29,6 +39,7 @@ def step(self, action: dict[str, Any]) -> tuple[dict[str, Any], float, bool, boo
     def reset(
         self, seed: int | None = None, options: dict[str, Any] | None = None
     ) -> tuple[dict[str, Any], dict[str, Any]]:
+        self.sim.reset()
         _, info = super().reset(seed=seed, options=options)
         self.sim.step(1)
         obs = cast(dict, self.unwrapped.get_obs())

From 42e7323b604e8283ba339673b39be5ede60bcc5c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 3 Dec 2024 17:00:15 +0100
Subject: [PATCH 03/11] fix(sim): reset sim

- fixed callback reset
- set render callback to negative inital value to direclty render in the first step
- reset fr3 also sets the controller values to the home pose
---
 src/sim/FR3.cpp | 11 ++++-------
 src/sim/sim.cpp | 27 +++++++++++++++++++++++++--
 src/sim/sim.h   |  1 +
 3 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/src/sim/FR3.cpp b/src/sim/FR3.cpp
index 23524a84..475b06f0 100644
--- a/src/sim/FR3.cpp
+++ b/src/sim/FR3.cpp
@@ -200,13 +200,7 @@ bool FR3::convergence_callback() {
   return this->state.is_arrived and not this->state.is_moving;
 }
 
-void FR3::m_reset() {
-  for (size_t i = 0; i < std::size(this->ids.joints); ++i) {
-    size_t jnt_id = this->ids.joints[i];
-    size_t jnt_qposadr = this->sim->m->jnt_qposadr[jnt_id];
-    this->sim->d->qpos[jnt_qposadr] = q_home[i];
-  }
-}
+void FR3::m_reset() { this->set_joints_hard(q_home); }
 
 void FR3::set_joints_hard(const common::Vector7d& q) {
   for (size_t i = 0; i < std::size(this->ids.joints); ++i) {
@@ -214,6 +208,9 @@ void FR3::set_joints_hard(const common::Vector7d& q) {
     size_t jnt_qposadr = this->sim->m->jnt_qposadr[jnt_id];
     this->sim->d->qpos[jnt_qposadr] = q[i];
   }
+  for (size_t i = 0; i < std::size(this->ids.actuators); ++i) {
+    this->sim->d->ctrl[this->ids.actuators[i]] = q[i];
+  }
 }
 
 common::Pose FR3::get_base_pose_in_world_coordinates() {
diff --git a/src/sim/sim.cpp b/src/sim/sim.cpp
index 8e1d86bf..45ca9702 100644
--- a/src/sim/sim.cpp
+++ b/src/sim/sim.cpp
@@ -121,7 +121,28 @@ void Sim::step(size_t k) {
   }
 }
 
-void Sim::reset() { mj_resetData(this->m, this->d); }
+void Sim::reset() {
+  mj_resetData(this->m, this->d);
+  this->reset_callbacks();
+}
+
+void Sim::reset_callbacks() {
+  for (size_t i = 0; i < std::size(this->callbacks); ++i) {
+    this->callbacks[i].last_call_timestamp = 0;
+  }
+  for (size_t i = 0; i < std::size(this->any_callbacks); ++i) {
+    this->any_callbacks[i].last_call_timestamp = 0;
+  }
+  for (size_t i = 0; i < std::size(this->all_callbacks); ++i) {
+    this->all_callbacks[i].last_call_timestamp = 0;
+  }
+  for (size_t i = 0; i < std::size(this->rendering_callbacks); ++i) {
+    // this is negative so that we will directly render the cameras
+    // in the first step
+    this->rendering_callbacks[i].last_call_timestamp =
+        -this->rendering_callbacks[i].seconds_between_calls;
+  }
+}
 
 void Sim::register_cb(std::function<void(void)> cb,
                       mjtNum seconds_between_calls) {
@@ -151,7 +172,9 @@ void Sim::register_rendering_callback(
       RenderingCallback{.cb = cb,
                         .id = id,
                         .seconds_between_calls = seconds_between_calls,
-                        .last_call_timestamp = 0});
+                        // this is negative so that we will directly render the
+                        // cameras in the first step
+                        .last_call_timestamp = -seconds_between_calls});
 }
 
 void Sim::start_gui_server(const std::string& id) {
diff --git a/src/sim/sim.h b/src/sim/sim.h
index 9916ce94..9a899d30 100644
--- a/src/sim/sim.h
+++ b/src/sim/sim.h
@@ -80,6 +80,7 @@ class Sim {
   bool is_converged();
   void step_until_convergence();
   void step(size_t k);
+  void reset_callbacks();
   void reset();
   /* NOTE: IMPORTANT, the callback is not necessarily called at exactly the
    * the requested interval. We invoke a callback if the elapsed simulation time

From 5e4313e530ed451597e0f267eb960e8be2cd628b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 3 Dec 2024 17:03:47 +0100
Subject: [PATCH 04/11] feat(env): random cube pos wrapper

- added random cube pos wrapper which randomizes cube position in fr3 simple pick up sim env
- set truncate on collision to fales
- update model repo git version
---
 CMakeLists.txt                 |  2 +-
 python/rcsss/envs/factories.py |  9 ++++++---
 python/rcsss/envs/sim.py       | 19 +++++++++++++++++++
 3 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/CMakeLists.txt b/CMakeLists.txt
index 4b6a9371..22f4e76c 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -96,7 +96,7 @@ FetchContent_MakeAvailable(libfranka rl pybind11)
 if (${INCLUDE_UTN_MODELS})
     if (DEFINED GITLAB_MODELS_TOKEN)
         include(download_scenes)
-        set(ref 77e8782a3488cbf01361d2322f02e75c0d820644)
+        set(ref b8234983a5e35e222c955f9145ac4cd129827a8e)
         FetchContent_Declare(
             scenes
             URL         "https://gitos.rrze.fau.de/api/v4/projects/1100/repository/archive?path=scenes&sha=${ref}"
diff --git a/python/rcsss/envs/factories.py b/python/rcsss/envs/factories.py
index 090a74a6..03af66cc 100644
--- a/python/rcsss/envs/factories.py
+++ b/python/rcsss/envs/factories.py
@@ -1,10 +1,11 @@
 import logging
 from os import PathLike
+from typing import Type
 
 import gymnasium as gym
-from gymnasium.envs import EnvCreator
 import numpy as np
 import rcsss
+from gymnasium.envs.registration import EnvCreator
 from rcsss import sim
 from rcsss._core.hw import FR3Config, IKController
 from rcsss._core.sim import CameraType
@@ -23,7 +24,7 @@
     RelativeTo,
 )
 from rcsss.envs.hw import FR3HW
-from rcsss.envs.sim import CollisionGuard, FR3Sim
+from rcsss.envs.sim import CollisionGuard, FR3Sim, RandomCubePos, SimWrapper
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -136,6 +137,7 @@ def fr3_hw_env(
             control_mode=control_mode,
             tcp_offset=rcsss.common.Pose(rcsss.common.FrankaHandTCPOffset()),
             sim_gui=True,
+            truncate_on_collision=False,
         )
     if max_relative_movement is not None:
         env = RelativeActionSpace(env, max_mov=max_relative_movement, relative_to=relative_to)
@@ -231,6 +233,7 @@ def fr3_sim_env(
             control_mode=control_mode,
             tcp_offset=rcsss.common.Pose(rcsss.common.FrankaHandTCPOffset()),
             sim_gui=True,
+            truncate_on_collision=False,
         )
     if max_relative_movement is not None:
         env = RelativeActionSpace(env, max_mov=max_relative_movement, relative_to=relative_to)
@@ -307,12 +310,12 @@ def __call__(
             max_relative_movement=(0.2, np.deg2rad(45)) if delta_actions else None,
             relative_to=RelativeTo.LAST_STEP,
             mjcf="fr3_simple_pick_up",
+            sim_wrapper=RandomCubePos,
         )
         if render_mode == "human":
             env_rel.get_wrapper_attr("sim").open_gui()
 
         # TODO:
-        # - randomize location of the yellow cube
         # - wrapper for success/failure and reward
 
         return env_rel
diff --git a/python/rcsss/envs/sim.py b/python/rcsss/envs/sim.py
index 3dc9ea5a..f8475127 100644
--- a/python/rcsss/envs/sim.py
+++ b/python/rcsss/envs/sim.py
@@ -7,6 +7,7 @@
 from rcsss import sim
 from rcsss.envs.base import ControlMode, FR3Env, GripperWrapper
 
+
 class SimWrapper(gym.Wrapper):
     def __init__(self, env: gym.Env, simulation: sim.Sim):
         super().__init__(env)
@@ -168,3 +169,21 @@ def env_from_xml_paths(
             sim_gui=sim_gui,
             truncate_on_collision=truncate_on_collision,
         )
+
+
+class RandomCubePos(SimWrapper):
+    """Wrapper to randomly place cube in the FR3SimplePickUpSim environment."""
+
+    def reset(
+        self, seed: int | None = None, options: dict[str, Any] | None = None
+    ) -> tuple[dict[str, Any], dict[str, Any]]:
+        obs, info = super().reset(seed=seed, options=options)
+
+        iso_cube = [0.498, 0.0, 0.226]
+        pos_x = iso_cube[0] + np.random.random() * 0.2 - 0.1
+        pos_y = iso_cube[1] + np.random.random() * 0.2 - 0.1
+        pos_z = 0.03
+
+        self.sim.data.joint("yellow-box-joint").qpos[:3] = [pos_x, pos_y, pos_z]
+
+        return obs, info

From 8e6fc16f2c90397225eac9922666113b33a5196a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 3 Dec 2024 17:04:32 +0100
Subject: [PATCH 05/11] bump(mujoco): 3.1.5 -> 3.2.6

---
 pyproject.toml       | 2 +-
 requirements_dev.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 23740cad..2028c783 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,7 +23,7 @@ dependencies = ["websockets>=11.0",
     "python-dotenv==1.0.1",
     "opencv-python~=4.10.0.84",
     # NOTE: when changing the mujoco version, also change it in requirements_dev.txt
-    "mujoco==3.1.5"
+    "mujoco==3.2.6"
 ]
 readme = "README.md"
 maintainers = [
diff --git a/requirements_dev.txt b/requirements_dev.txt
index 4224d819..6b51c543 100644
--- a/requirements_dev.txt
+++ b/requirements_dev.txt
@@ -9,5 +9,5 @@ pytest==8.1.1
 commitizen~=3.28.0
 scikit-build-core>=0.3.3
 pybind11
-mujoco==3.1.5
+mujoco==3.2.6
 wheel

From fb4861b2916f2b1f36c1840f70aafab46ee9f1f3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 3 Dec 2024 20:50:32 +0100
Subject: [PATCH 06/11] feat(env): reward and success for simple pick up env

---
 pyproject.toml                 |  1 +
 python/rcsss/envs/factories.py | 16 ++++++----------
 python/rcsss/envs/sim.py       | 29 ++++++++++++++++++++++++++++-
 3 files changed, 35 insertions(+), 11 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 2028c783..c26b9871 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -98,6 +98,7 @@ ignore = [
   "T201",    # print() used
   "PLR5501", # elif to reduce indentation
   "PT018",   # assertion should be broken down into multiple parts
+  "NPY002",
 ]
 
 [tool.pylint.format]
diff --git a/python/rcsss/envs/factories.py b/python/rcsss/envs/factories.py
index 03af66cc..d7b9e886 100644
--- a/python/rcsss/envs/factories.py
+++ b/python/rcsss/envs/factories.py
@@ -24,7 +24,7 @@
     RelativeTo,
 )
 from rcsss.envs.hw import FR3HW
-from rcsss.envs.sim import CollisionGuard, FR3Sim, RandomCubePos, SimWrapper
+from rcsss.envs.sim import CollisionGuard, FR3Sim, FR3SimplePickUpSimSuccessWrapper, RandomCubePos, SimWrapper
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)
@@ -221,7 +221,7 @@ def fr3_sim_env(
 
     if gripper_cfg is not None:
         gripper = sim.FrankaHand(simulation, "0", gripper_cfg)
-        env = GripperWrapper(env, gripper, binary=False)
+        env = GripperWrapper(env, gripper, binary=True)
 
     if collision_guard:
         env = CollisionGuard.env_from_xml_paths(
@@ -242,7 +242,7 @@ def fr3_sim_env(
 
 
 class FR3Real(EnvCreator):
-    def __call__(
+    def __call__(  # type: ignore
         self,
         robot_ip: str,
         control_mode: str = "xyzrpy",
@@ -251,7 +251,7 @@ def __call__(
         gripper: bool = True,
     ) -> gym.Env:
         camera_set = default_realsense(camera_config)
-        env_rel = fr3_hw_env(
+        return fr3_hw_env(
             ip=robot_ip,
             camera_set=camera_set,
             control_mode=(
@@ -266,11 +266,9 @@ def __call__(
             relative_to=RelativeTo.LAST_STEP,
         )
 
-        return env_rel
-
 
 class FR3SimplePickUpSim(EnvCreator):
-    def __call__(
+    def __call__(  # type: ignore
         self,
         render_mode: str = "human",
         control_mode: str = "xyzrpy",
@@ -312,10 +310,8 @@ def __call__(
             mjcf="fr3_simple_pick_up",
             sim_wrapper=RandomCubePos,
         )
+        env_rel = FR3SimplePickUpSimSuccessWrapper(env_rel)
         if render_mode == "human":
             env_rel.get_wrapper_attr("sim").open_gui()
 
-        # TODO:
-        # - wrapper for success/failure and reward
-
         return env_rel
diff --git a/python/rcsss/envs/sim.py b/python/rcsss/envs/sim.py
index f8475127..08a9e6a4 100644
--- a/python/rcsss/envs/sim.py
+++ b/python/rcsss/envs/sim.py
@@ -183,7 +183,34 @@ def reset(
         pos_x = iso_cube[0] + np.random.random() * 0.2 - 0.1
         pos_y = iso_cube[1] + np.random.random() * 0.2 - 0.1
         pos_z = 0.03
-
         self.sim.data.joint("yellow-box-joint").qpos[:3] = [pos_x, pos_y, pos_z]
 
         return obs, info
+
+
+class FR3SimplePickUpSimSuccessWrapper(gym.Wrapper):
+    """Wrapper to check if the cube is successfully picked up in the FR3SimplePickUpSim environment."""
+
+    EE_HOME = np.array([0.34169773, 0.00047028, 0.4309004])
+
+    def __init__(self, env):
+        super().__init__(env)
+        self.unwrapped: FR3Env
+        assert isinstance(self.unwrapped.robot, sim.FR3), "Robot must be a sim.FR3 instance."
+        self.sim = env.get_wrapper_attr("sim")
+
+    def step(self, action: dict[str, Any]):
+        obs, reward, done, truncated, info = super().step(action)
+
+        success = (
+            self.sim.data.joint("yellow-box-joint").qpos[2] > 0.3
+            and obs["gripper"] == GripperWrapper.BINARY_GRIPPER_CLOSED
+        )
+        diff_ee_cube = np.linalg.norm(
+            self.sim.data.joint("yellow-box-joint").qpos[:3]
+            - self.unwrapped.robot.get_cartesian_position().translation()
+        )
+        diff_cube_home = np.linalg.norm(self.sim.data.joint("yellow-box-joint").qpos[:3] - self.EE_HOME)
+        reward = -diff_cube_home - diff_ee_cube
+
+        return obs, reward, success, truncated, info

From 8928582a789bcf024d2f3573306e722247de24c1 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 3 Dec 2024 20:53:20 +0100
Subject: [PATCH 07/11] style: fix pyformat

---
 python/rcsss/envs/factories.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/python/rcsss/envs/factories.py b/python/rcsss/envs/factories.py
index d7b9e886..1bb45def 100644
--- a/python/rcsss/envs/factories.py
+++ b/python/rcsss/envs/factories.py
@@ -24,7 +24,13 @@
     RelativeTo,
 )
 from rcsss.envs.hw import FR3HW
-from rcsss.envs.sim import CollisionGuard, FR3Sim, FR3SimplePickUpSimSuccessWrapper, RandomCubePos, SimWrapper
+from rcsss.envs.sim import (
+    CollisionGuard,
+    FR3Sim,
+    FR3SimplePickUpSimSuccessWrapper,
+    RandomCubePos,
+    SimWrapper,
+)
 
 logger = logging.getLogger(__name__)
 logger.setLevel(logging.INFO)

From 2bc8acf6670f5a803fbaed22de2b3fad2c7c4188 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Tue, 3 Dec 2024 21:06:32 +0100
Subject: [PATCH 08/11] fix(env): binary gripper observation

- binary observation
- variables for open and closed state
---
 python/rcsss/envs/base.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/python/rcsss/envs/base.py b/python/rcsss/envs/base.py
index 2bbc3c6e..1724998b 100644
--- a/python/rcsss/envs/base.py
+++ b/python/rcsss/envs/base.py
@@ -523,6 +523,10 @@ def close(self):
 
 class GripperWrapper(ActObsInfoWrapper):
     # TODO: sticky gripper, like in aloha
+
+    BINARY_GRIPPER_CLOSED = 0
+    BINARY_GRIPPER_OPEN = 1
+
     def __init__(self, env, gripper: common.Gripper, binary: bool = True):
         super().__init__(env)
         self.unwrapped: FR3Env
@@ -543,7 +547,9 @@ def reset(self, **kwargs) -> tuple[dict[str, Any], dict[str, Any]]:
     def observation(self, observation: dict[str, Any], info: dict[str, Any]) -> tuple[dict[str, Any], dict[str, Any]]:
         observation = copy.deepcopy(observation)
         if self.binary:
-            observation[self.gripper_key] = self._last_gripper_cmd if self._last_gripper_cmd is not None else 1
+            observation[self.gripper_key] = (
+                self._last_gripper_cmd if self._last_gripper_cmd is not None else self.BINARY_GRIPPER_OPEN
+            )
         else:
             observation[self.gripper_key] = self._gripper.get_normalized_width()
 
@@ -565,7 +571,7 @@ def action(self, action: dict[str, Any]) -> dict[str, Any]:
 
         if self._last_gripper_cmd is None or self._last_gripper_cmd != gripper_action:
             if self.binary:
-                self._gripper.grasp() if gripper_action == 0 else self._gripper.open()
+                self._gripper.grasp() if gripper_action == self.BINARY_GRIPPER_CLOSED else self._gripper.open()
             else:
                 self._gripper.set_normalized_width(gripper_action)
         self._last_gripper_cmd = gripper_action

From 1d3f458fa22ae47ff430c97169fb4ec59c6c680c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 4 Dec 2024 14:11:52 +0100
Subject: [PATCH 09/11] test(sim env): remove truncation assert in collision
 guard

---
 python/tests/test_sim_envs.py | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/python/tests/test_sim_envs.py b/python/tests/test_sim_envs.py
index 02d22ee6..3c841cdf 100644
--- a/python/tests/test_sim_envs.py
+++ b/python/tests/test_sim_envs.py
@@ -152,9 +152,8 @@ def test_collision_guard_trpy(self, cfg, gripper_cfg, cam_cfg):
         obs["xyzrpy"][2] = -0.2
         collision_action = TRPYDictType(xyzrpy=obs["xyzrpy"])
         collision_action.update(GripperDictType(gripper=0))
-        _, _, _, truncated, info = env.step(collision_action)
+        _, _, _, info = env.step(collision_action)
         p2 = env.unwrapped.robot.get_joint_position()
-        assert truncated
         assert info["collision"]
         # assure that the robot did not move
         assert np.allclose(p1, p2)
@@ -254,9 +253,8 @@ def test_collision_guard_tquart(self, cfg, gripper_cfg, cam_cfg):
         obs["tquart"][2] = -0.2
         collision_action = TQuartDictType(tquart=obs["tquart"])
         collision_action.update(GripperDictType(gripper=0))
-        _, _, _, truncated, info = env.step(collision_action)
+        _, _, _, _, info = env.step(collision_action)
         p2 = env.unwrapped.robot.get_joint_position()
-        assert truncated
         assert info["collision"]
         # assure that the robot did not move
         assert np.allclose(p1, p2)
@@ -327,10 +325,9 @@ def test_collision_guard_joints(self, cfg, gripper_cfg, cam_cfg):
         # the below action is a test_case where there is an obvious collision regardless of the gripper action
         collision_act = JointsDictType(joints=np.array([0, 1.78, 0, -1.45, 0, 0, 0], dtype=np.float32))
         collision_act.update(GripperDictType(gripper=1))
-        _, _, _, truncated, info = env.step(collision_act)
+        _, _, _, _, info = env.step(collision_act)
         p2 = env.unwrapped.robot.get_joint_position()
 
-        assert truncated
         assert info["collision"]
         # assure that the robot did not move
         assert np.allclose(p1, p2)

From ea390cbe374fcfdff63c4a8d899ff0fcc15368e8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 4 Dec 2024 14:16:17 +0100
Subject: [PATCH 10/11] example: added grasp demo

---
 python/examples/fr3.py        |  9 +---
 python/examples/grasp_demo.py | 93 +++++++++++++++++++++++++++++++++++
 2 files changed, 95 insertions(+), 7 deletions(-)
 create mode 100644 python/examples/grasp_demo.py

diff --git a/python/examples/fr3.py b/python/examples/fr3.py
index 254ee83c..f87135ec 100644
--- a/python/examples/fr3.py
+++ b/python/examples/fr3.py
@@ -79,13 +79,8 @@ def main():
 
             # add camera to have a rendering gui
             cameras = {
-                "default_free": SimCameraConfig(
-                    identifier="", type=int(CameraType.default_free)
-                ),
-                "wrist": SimCameraConfig(
-                    identifier="eye-in-hand_0", type=int(CameraType.fixed)
-                ),
-                # TODO: odd behavior when not both cameras are used: only last image is shown
+                "default_free": SimCameraConfig(identifier="", type=int(CameraType.default_free)),
+                "wrist": SimCameraConfig(identifier="eye-in-hand_0", type=int(CameraType.fixed)),
             }
             cam_cfg = SimCameraSetConfig(cameras=cameras, resolution_width=1280, resolution_height=720, frame_rate=20)
             camera_set = SimCameraSet(simulation, cam_cfg)
diff --git a/python/examples/grasp_demo.py b/python/examples/grasp_demo.py
new file mode 100644
index 00000000..795ad7eb
--- /dev/null
+++ b/python/examples/grasp_demo.py
@@ -0,0 +1,93 @@
+import logging
+
+import gymnasium as gym
+import mujoco
+import numpy as np
+from rcsss._core.common import Pose
+from rcsss._core.sim import FR3State
+from rcsss.envs.base import GripperWrapper
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+
+class PickUpDemo:
+    def __init__(self, env: gym.Env):
+        self.env = env
+        self.home_pose = self.env.unwrapped.robot.get_cartesian_position()
+
+    def _action(self, pose: Pose, gripper: float) -> np.ndarray:
+        return {"xyzrpy": pose.xyzrpy(), "gripper": gripper}
+
+    def get_object_pose(self, geom_name) -> Pose:
+        model = self.env.get_wrapper_attr("sim").model
+        data = self.env.get_wrapper_attr("sim").data
+
+        geom_id = mujoco.mj_name2id(model, mujoco.mjtObj.mjOBJ_GEOM, geom_name)
+        return Pose(translation=data.geom_xpos[geom_id], rotation=data.geom_xmat[geom_id].reshape(3, 3))
+
+    def generate_waypoints(self, start_pose: Pose, end_pose: Pose, num_waypoints: int) -> list[Pose]:
+        waypoints = []
+        for i in range(num_waypoints + 1):
+            t = i / (num_waypoints + 1)
+            waypoints.append(start_pose.interpolate(end_pose, t))
+        return waypoints
+
+    def step(self, action: np.ndarray) -> dict:
+        re = self.env.step(action)
+        s: FR3State = self.env.unwrapped.robot.get_state()
+        return re[0]
+
+    def plan_linear_motion(self, geom_name: str, delta_up: float, num_waypoints: int = 20) -> list[Pose]:
+        end_eff_pose = self.env.unwrapped.robot.get_cartesian_position()
+
+        goal_pose = self.get_object_pose(geom_name=geom_name)
+        # goal pose is above the object and gripper coordinate must flip z-axis (end effector base rotation is [1, 0, 0, 0])
+        # be careful we define identity quaternion as as [0, 0, 0, 1]
+        # this does not work if the object is flipped
+        goal_pose *= Pose(translation=[0, 0, delta_up], quaternion=[1, 0, 0, 0])
+
+        waypoints = self.generate_waypoints(end_eff_pose, goal_pose, num_waypoints=num_waypoints)
+        return waypoints
+
+    def execute_motion(self, waypoints: list[Pose], gripper: float = GripperWrapper.BINARY_GRIPPER_OPEN) -> dict:
+        for i in range(1, len(waypoints)):
+            # calculate delta action
+            delta_action = waypoints[i] * waypoints[i - 1].inverse()
+            obs = self.step(self._action(delta_action, gripper))
+        return obs
+
+    def approach(self, geom_name: str):
+        waypoints = self.plan_linear_motion(geom_name=geom_name, delta_up=0.2, num_waypoints=5)
+        self.execute_motion(waypoints=waypoints, gripper=GripperWrapper.BINARY_GRIPPER_OPEN)
+
+    def grasp(self, geom_name: str):
+
+        waypoints = self.plan_linear_motion(geom_name=geom_name, delta_up=0, num_waypoints=15)
+        self.execute_motion(waypoints=waypoints, gripper=GripperWrapper.BINARY_GRIPPER_OPEN)
+
+        self.step(self._action(Pose(), GripperWrapper.BINARY_GRIPPER_CLOSED))
+
+        waypoints = self.plan_linear_motion(geom_name=geom_name, delta_up=0.2, num_waypoints=20)
+        self.execute_motion(waypoints=waypoints, gripper=GripperWrapper.BINARY_GRIPPER_CLOSED)
+
+    def move_home(self):
+        end_eff_pose = self.env.unwrapped.robot.get_cartesian_position()
+        waypoints = self.generate_waypoints(end_eff_pose, self.home_pose, num_waypoints=10)
+        self.execute_motion(waypoints=waypoints, gripper=GripperWrapper.BINARY_GRIPPER_CLOSED)
+
+    def pickup(self, geom_name: str):
+        self.approach(geom_name)
+        self.grasp(geom_name)
+        self.move_home()
+
+
+def main():
+    env = gym.make("rcs/SimplePickUpSim-v0", render_mode="human", delta_actions=True)
+    env.reset()
+    controller = PickUpDemo(env)
+    controller.pickup("yellow_box_geom")
+
+
+if __name__ == "__main__":
+    main()

From d8dfe4fb801b9f65b738c85d8b9d126eb5d44cae Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tobias=20J=C3=BClg?= <tobias.juelg@utn.de>
Date: Wed, 4 Dec 2024 22:58:34 +0100
Subject: [PATCH 11/11] tests: fix tests by removing controller reset

---
 python/rcsss/envs/factories.py |  2 +-
 python/tests/test_sim_envs.py  | 10 +++++-----
 src/sim/FR3.cpp                |  3 ---
 3 files changed, 6 insertions(+), 9 deletions(-)

diff --git a/python/rcsss/envs/factories.py b/python/rcsss/envs/factories.py
index 1bb45def..09617d1e 100644
--- a/python/rcsss/envs/factories.py
+++ b/python/rcsss/envs/factories.py
@@ -239,7 +239,7 @@ def fr3_sim_env(
             control_mode=control_mode,
             tcp_offset=rcsss.common.Pose(rcsss.common.FrankaHandTCPOffset()),
             sim_gui=True,
-            truncate_on_collision=False,
+            truncate_on_collision=True,
         )
     if max_relative_movement is not None:
         env = RelativeActionSpace(env, max_mov=max_relative_movement, relative_to=relative_to)
diff --git a/python/tests/test_sim_envs.py b/python/tests/test_sim_envs.py
index 3c841cdf..36980124 100644
--- a/python/tests/test_sim_envs.py
+++ b/python/tests/test_sim_envs.py
@@ -152,9 +152,9 @@ def test_collision_guard_trpy(self, cfg, gripper_cfg, cam_cfg):
         obs["xyzrpy"][2] = -0.2
         collision_action = TRPYDictType(xyzrpy=obs["xyzrpy"])
         collision_action.update(GripperDictType(gripper=0))
-        _, _, _, info = env.step(collision_action)
+        _, _, _, _, info = env.step(collision_action)
         p2 = env.unwrapped.robot.get_joint_position()
-        assert info["collision"]
+        self.assert_collision(info)
         # assure that the robot did not move
         assert np.allclose(p1, p2)
 
@@ -180,7 +180,7 @@ def test_non_zero_action_tquart(self, cfg):
         non_zero_action = TQuartDictType(tquart=np.concatenate([t, q], axis=0))
         expected_obs = obs_initial.copy()
         expected_obs["tquart"][0] += x_pos_change
-        obs, _, _, _, info = env.step(non_zero_action)
+        _, _, _, _, info = env.step(non_zero_action)
         self.assert_no_pose_change(info, obs_initial, expected_obs)
 
     def test_zero_action_tquart(self, cfg):
@@ -255,7 +255,7 @@ def test_collision_guard_tquart(self, cfg, gripper_cfg, cam_cfg):
         collision_action.update(GripperDictType(gripper=0))
         _, _, _, _, info = env.step(collision_action)
         p2 = env.unwrapped.robot.get_joint_position()
-        assert info["collision"]
+        self.assert_collision(info)
         # assure that the robot did not move
         assert np.allclose(p1, p2)
 
@@ -328,7 +328,7 @@ def test_collision_guard_joints(self, cfg, gripper_cfg, cam_cfg):
         _, _, _, _, info = env.step(collision_act)
         p2 = env.unwrapped.robot.get_joint_position()
 
-        assert info["collision"]
+        self.assert_collision(info)
         # assure that the robot did not move
         assert np.allclose(p1, p2)
 
diff --git a/src/sim/FR3.cpp b/src/sim/FR3.cpp
index 475b06f0..14081036 100644
--- a/src/sim/FR3.cpp
+++ b/src/sim/FR3.cpp
@@ -208,9 +208,6 @@ void FR3::set_joints_hard(const common::Vector7d& q) {
     size_t jnt_qposadr = this->sim->m->jnt_qposadr[jnt_id];
     this->sim->d->qpos[jnt_qposadr] = q[i];
   }
-  for (size_t i = 0; i < std::size(this->ids.actuators); ++i) {
-    this->sim->d->ctrl[this->ids.actuators[i]] = q[i];
-  }
 }
 
 common::Pose FR3::get_base_pose_in_world_coordinates() {