From 92b6abaa22ecc5c4857d85ce04ab43f2873c47a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Mon, 7 Jul 2025 12:59:59 +0200 Subject: [PATCH 01/13] fix(sim camera): remove framerate for direct camera render flag --- src/sim/camera.cpp | 9 ++++++++- src/sim/camera.h | 2 ++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/src/sim/camera.cpp b/src/sim/camera.cpp index 7323d6c4..e8fb659e 100644 --- a/src/sim/camera.cpp +++ b/src/sim/camera.cpp @@ -86,13 +86,20 @@ std::optional SimCameraSet::get_timestamp_frameset(float ts) { void SimCameraSet::render_all() { for (auto const& [id, cam] : this->cameras_cfg) { mjrContext* ctx = this->sim->renderer.get_context(id); - this->frame_callback(id, *ctx, this->sim->renderer.scene, + this->render_single(id, *ctx, this->sim->renderer.scene, this->sim->renderer.opt); } } void SimCameraSet::frame_callback(const std::string& id, mjrContext& ctx, mjvScene& scene, mjvOption& opt) { + if (!this->render_on_demand){ + this->render_single(id, ctx, scene, opt); + } +} + +void SimCameraSet::render_single(const std::string& id, mjrContext& ctx, + mjvScene& scene, mjvOption& opt) { mjrRect viewport = mjr_maxViewport(&ctx); int W = viewport.width; int H = viewport.height; diff --git a/src/sim/camera.h b/src/sim/camera.h index 160cd01e..095991e1 100644 --- a/src/sim/camera.h +++ b/src/sim/camera.h @@ -70,6 +70,8 @@ class SimCameraSet { std::mutex buffer_lock; mjtNum last_ts = 0; void render_all(); + void render_single(const std::string& id, mjrContext& ctx, mjvScene& scene, + mjvOption& opt); }; } // namespace sim } // namespace rcs From 5a8b5878d77451c3b976921c860f79d582a0276d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Tue, 8 Jul 2025 16:45:45 +0200 Subject: [PATCH 02/13] feat(sim): intrinsic and extrinsic as additional camera data - added extrinsic and intrinsic matrices to camera interface - added extrinsic and intrinsic to mujoco camera set --- python/rcs/camera/interface.py | 5 ++++- python/rcs/camera/sim.py | 39 ++++++++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 3 deletions(-) diff --git a/python/rcs/camera/interface.py b/python/rcs/camera/interface.py index 53f15ab3..fd1c53f7 100644 --- a/python/rcs/camera/interface.py +++ b/python/rcs/camera/interface.py @@ -1,8 +1,9 @@ import logging from dataclasses import dataclass from datetime import datetime -from typing import Any, Protocol +from typing import Any, Literal, Protocol +import numpy as np from rcs._core.common import BaseCameraConfig logger = logging.getLogger(__name__) @@ -14,6 +15,8 @@ class DataFrame: data: Any # timestamp in posix time timestamp: float | None = None + intrinsics: np.ndarray[tuple[Literal[3, 4]], np.dtype[np.float64]] | None = None + extrinsics: np.ndarray[tuple[Literal[4, 4]], np.dtype[np.float64]] | None = None @dataclass(kw_only=True) diff --git a/python/rcs/camera/sim.py b/python/rcs/camera/sim.py index 2400bf5c..909ff4f3 100644 --- a/python/rcs/camera/sim.py +++ b/python/rcs/camera/sim.py @@ -1,9 +1,12 @@ import logging from datetime import datetime +from typing import Literal +import mujoco import numpy as np # from rcs._core.common import BaseCameraConfig +from rcs._core import common from rcs._core.sim import FrameSet as _FrameSet from rcs._core.sim import SimCameraConfig from rcs._core.sim import SimCameraSet as _SimCameraSet @@ -68,14 +71,46 @@ def _cpp_to_python_frames(self, cpp_frameset: _FrameSet | None) -> FrameSet | No near = self._sim.model.vis.map.znear * extent far = self._sim.model.vis.map.zfar * extent depth_np_frame = near / (1 - depth_np_frame * (1 - near / far)) + + intrinsics = self._intrinsics(color_name) + extrinsics = self._extrinsics(color_name) + cameraframe = CameraFrame( - color=DataFrame(data=color_np_frame, timestamp=cpp_frameset.timestamp), - depth=DataFrame(data=depth_np_frame, timestamp=cpp_frameset.timestamp), + color=DataFrame( + data=color_np_frame, timestamp=cpp_frameset.timestamp, intrinsics=intrinsics, extrinsics=extrinsics + ), + depth=DataFrame( + data=depth_np_frame, timestamp=cpp_frameset.timestamp, intrinsics=intrinsics, extrinsics=extrinsics + ), ) frame = Frame(camera=cameraframe, avg_timestamp=cpp_frameset.timestamp) frames[color_name] = frame return FrameSet(frames=frames, avg_timestamp=cpp_frameset.timestamp) + def _intrinsics(self, camera_name) -> np.ndarray[tuple[Literal[3, 4]], np.dtype[np.float64]]: + cam_id = mujoco.mj_name2id(self._sim.model, mujoco.mjtObj.mjOBJ_CAMERA, self.cameras[camera_name].identifier) + fovy = self._sim.model.cam_fovy[cam_id] + fx = fy = 0.5 * self.cameras[camera_name].resolution_height / np.tan(fovy * np.pi / 360) + return np.array( + [ + [fx, 0, (self.cameras[camera_name].resolution_width - 1) / 2, 0], + [0, fy, (self.cameras[camera_name].resolution_height - 1) / 2, 0], + [0, 0, 1, 0], + ] + ) + + def _extrinsics(self, camera_name) -> np.ndarray[tuple[Literal[4, 4]], np.dtype[np.float64]]: + cam_id = mujoco.mj_name2id(self._sim.model, mujoco.mjtObj.mjOBJ_CAMERA, self.cameras[camera_name].identifier) + xpos = self._sim.data.cam_xpos[cam_id] + xmat = self._sim.data.cam_xmat[cam_id].reshape(3, 3) + + cam = common.Pose(rotation=xmat, translation=xpos) + # put z axis infront + rotation_p = common.Pose(rpy_vector=[np.pi, 0, 0], translation=[0, 0, 0]) + cam = cam * rotation_p + + return cam.inverse().pose_matrix() + def config(self, camera_name: str) -> SimCameraConfig: """Should return the configuration of the camera with the given name.""" return self.cameras[camera_name] From aad0ad54ae68bd93387a107be8c942189df81553 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Tue, 8 Jul 2025 17:28:32 +0200 Subject: [PATCH 03/13] refactor: move xtrinsics into calibration --- .../rcs_realsense/src/rcs_realsense/camera.py | 11 +++++++++++ python/rcs/camera/hw.py | 14 +++++++++++++- python/rcs/camera/interface.py | 11 +++++++++++ python/rcs/camera/sim.py | 15 +++++++++------ 4 files changed, 44 insertions(+), 7 deletions(-) diff --git a/extensions/rcs_realsense/src/rcs_realsense/camera.py b/extensions/rcs_realsense/src/rcs_realsense/camera.py index 4cd55c85..30f3fcf0 100644 --- a/extensions/rcs_realsense/src/rcs_realsense/camera.py +++ b/extensions/rcs_realsense/src/rcs_realsense/camera.py @@ -1,5 +1,7 @@ import logging from dataclasses import dataclass +import threading +import typing import numpy as np import pyrealsense2 as rs @@ -39,6 +41,8 @@ def __init__( self.enable_imu = enable_imu self.cameras = cameras self._logger = logging.getLogger(__name__) + self._last_frameset: dict[rs.stream, rs.frame] | None = None + self._frameset_lock = threading.Lock() assert ( len({camera.resolution_width for camera in self.cameras.values()}) == 1 and len({camera.resolution_height for camera in self.cameras.values()}) == 1 @@ -209,6 +213,8 @@ def poll_frame(self, camera_name: str) -> Frame: streams = device.pipeline_profile.get_streams() frameset = device.pipeline.wait_for_frames() # frameset = device.pipeline.poll_for_frames() + with self._frameset_lock: + self._last_frameset = frameset color: DataFrame | None = None ir: DataFrame | None = None @@ -346,3 +352,8 @@ def enable_emitter(self, enable_ir_emitter=True): sensor.set_option(rs.option.emitter_enabled, 1 if enable_ir_emitter else 0) if enable_ir_emitter: sensor.set_option(rs.option.laser_power, self.laser_power) + + + def intrinsics(self, camera_name: str) -> np.ndarray[tuple[typing.Literal[3, 4]], np.dtype[np.float64]]: + with self._frameset_lock: + pass diff --git a/python/rcs/camera/hw.py b/python/rcs/camera/hw.py index c0588b4e..01fe875c 100644 --- a/python/rcs/camera/hw.py +++ b/python/rcs/camera/hw.py @@ -9,7 +9,7 @@ import cv2 import numpy as np from rcs._core.common import BaseCameraConfig -from rcs.camera.interface import BaseCameraSet, Frame, FrameSet +from rcs.camera.interface import BaseCameraSet, Calibration, Frame, FrameSet from rcs.utils import SimpleFrameRate @@ -35,6 +35,9 @@ def poll_frame(self, camera_name: str) -> Frame: def camera_names(self) -> list[str]: """Returns the names of the cameras in this set.""" + def intrinsics(self, camera_name: str) -> np.ndarray[tuple[typing.Literal[3, 4]], np.dtype[np.float64]] + """Returns camera intrinsics""" + class HardwareCameraSet(BaseCameraSet): """This base class polls in a separate thread for all cameras and stores them in a buffer. @@ -178,6 +181,15 @@ def warm_up(self): self.poll_frame(camera_name) self.rate_limiter() + def get_calibration(self, camera_name) -> Calibration: + # get intrinsics from the camera and check that extrinsics had been calibrated + pass + + def calibrate(self, camera_name) -> bool: + # calibrate extrinsics with aruca marker + # lets provide a calibration function + pass + def polling_thread(self, warm_up: bool = True): for camera in self.cameras: camera.open() diff --git a/python/rcs/camera/interface.py b/python/rcs/camera/interface.py index fd1c53f7..bc501239 100644 --- a/python/rcs/camera/interface.py +++ b/python/rcs/camera/interface.py @@ -9,6 +9,11 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) +@dataclass(kw_only=True) +class Calibration: + intrinsics: np.ndarray[tuple[Literal[3, 4]], np.dtype[np.float64]] | None = None + extrinsics: np.ndarray[tuple[Literal[4, 4]], np.dtype[np.float64]] | None = None + @dataclass(kw_only=True) class DataFrame: @@ -68,6 +73,12 @@ def close(self): def config(self, camera_name: str) -> BaseCameraConfig: """Returns the configuration object of the cameras.""" + def get_calibration(self, camera_name) -> Calibration: + """Returns camera calibration""" + + def calibrate(self, camera_name) -> bool: + """Calibrates the camera""" + @property def camera_names(self) -> list[str]: """Returns a list of the activated human readable names of the cameras.""" diff --git a/python/rcs/camera/sim.py b/python/rcs/camera/sim.py index 909ff4f3..0029a15a 100644 --- a/python/rcs/camera/sim.py +++ b/python/rcs/camera/sim.py @@ -10,7 +10,7 @@ from rcs._core.sim import FrameSet as _FrameSet from rcs._core.sim import SimCameraConfig from rcs._core.sim import SimCameraSet as _SimCameraSet -from rcs.camera.interface import CameraFrame, DataFrame, Frame, FrameSet +from rcs.camera.interface import Calibration, CameraFrame, DataFrame, Frame, FrameSet from rcs import sim @@ -72,15 +72,12 @@ def _cpp_to_python_frames(self, cpp_frameset: _FrameSet | None) -> FrameSet | No far = self._sim.model.vis.map.zfar * extent depth_np_frame = near / (1 - depth_np_frame * (1 - near / far)) - intrinsics = self._intrinsics(color_name) - extrinsics = self._extrinsics(color_name) - cameraframe = CameraFrame( color=DataFrame( - data=color_np_frame, timestamp=cpp_frameset.timestamp, intrinsics=intrinsics, extrinsics=extrinsics + data=color_np_frame, timestamp=cpp_frameset.timestamp ), depth=DataFrame( - data=depth_np_frame, timestamp=cpp_frameset.timestamp, intrinsics=intrinsics, extrinsics=extrinsics + data=depth_np_frame, timestamp=cpp_frameset.timestamp ), ) frame = Frame(camera=cameraframe, avg_timestamp=cpp_frameset.timestamp) @@ -111,6 +108,12 @@ def _extrinsics(self, camera_name) -> np.ndarray[tuple[Literal[4, 4]], np.dtype[ return cam.inverse().pose_matrix() + def get_calibration(self, camera_name) -> Calibration: + return Calibration(intrinsics=self._intrinsics(camera_name), extrinsics=self._extrinsics(camera_name)) + + def calibrate(self, camera_name) -> bool: + return True + def config(self, camera_name: str) -> SimCameraConfig: """Should return the configuration of the camera with the given name.""" return self.cameras[camera_name] From 3b2194544dc539eff2a5396dd54e65a756b73e86 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Wed, 9 Jul 2025 09:56:54 +0200 Subject: [PATCH 04/13] refactor(camera): calibration interface change - changed the interface for hardware calibration - added realsense specific calibration code --- .../rcs_realsense/src/rcs_realsense/camera.py | 43 ++++++++++++++++--- python/rcs/camera/hw.py | 23 ++++++---- python/rcs/camera/interface.py | 8 ++-- python/rcs/camera/sim.py | 9 ++-- 4 files changed, 61 insertions(+), 22 deletions(-) diff --git a/extensions/rcs_realsense/src/rcs_realsense/camera.py b/extensions/rcs_realsense/src/rcs_realsense/camera.py index 30f3fcf0..aa39b715 100644 --- a/extensions/rcs_realsense/src/rcs_realsense/camera.py +++ b/extensions/rcs_realsense/src/rcs_realsense/camera.py @@ -5,6 +5,7 @@ import numpy as np import pyrealsense2 as rs +from rcs.camera.interface import Calibration from rcs.camera.hw import HardwareCamera from rcs.camera.interface import CameraFrame, DataFrame, Frame, IMUFrame @@ -22,10 +23,12 @@ class RealSenseDevicePipeline: pipeline: rs.pipeline pipeline_profile: rs.pipeline_profile camera: RealSenseDeviceInfo + depth_scale: float | None = None class RealSenseCameraSet(HardwareCamera): TIMESTAMP_FACTOR = 1e-3 + DEPTH_SCALE = 10000 def __init__( self, @@ -41,6 +44,7 @@ def __init__( self.enable_imu = enable_imu self.cameras = cameras self._logger = logging.getLogger(__name__) + # TODO: safe the last 30 recorded frames self._last_frameset: dict[rs.stream, rs.frame] | None = None self._frameset_lock = threading.Lock() assert ( @@ -168,7 +172,9 @@ def enable_device(self, device_info: RealSenseDeviceInfo, enable_ir_emitter: boo if sensor.supports(rs.option.emitter_enabled): sensor.set_option(rs.option.emitter_enabled, 1 if enable_ir_emitter else 0) sensor.set_option(rs.option.laser_power, self.laser_power) - self._enabled_devices[device_info.serial] = RealSenseDevicePipeline(pipeline, pipeline_profile, device_info) + self._enabled_devices[device_info.serial] = RealSenseDevicePipeline( + pipeline, pipeline_profile, device_info, depth_scale=sensor.get_depth_scale() + ) self._logger.debug("Enabled device %s (%s)", device_info.serial, device_info.product_line) @staticmethod @@ -255,7 +261,12 @@ def to_ts(frame: rs.frame) -> float: timestamps.append(to_ts(frame)) assert color is not None, "Color frame not found" - cf = CameraFrame(color=color, ir=ir, depth=depth) + # TODO: check depth type (should be int16) + cf = CameraFrame( + color=color, + ir=ir, + depth=(depth.astype(np.float64) * device.depth_scale / self.DEPTH_SCALE).astype(np.int16), + ) imu = IMUFrame(accel=accel, gyro=gyro) return Frame(camera=cf, imu=imu, avg_timestamp=float(np.mean(timestamps)) if len(timestamps) > 0 else None) @@ -303,7 +314,7 @@ def get_device_intrinsics( device_intrinsics[device_name][key] = value.get_profile().as_video_stream_profile().get_intrinsics() return device_intrinsics - def get_depth_to_color_extrinsics(self, frames): + def get_depth_to_color_extrinsics(self, frames: dict[rs.stream, rs.frame]): """ Get the extrinsics between the depth imager 1 and the color imager using its frame delivered by the realsense device @@ -353,7 +364,25 @@ def enable_emitter(self, enable_ir_emitter=True): if enable_ir_emitter: sensor.set_option(rs.option.laser_power, self.laser_power) - - def intrinsics(self, camera_name: str) -> np.ndarray[tuple[typing.Literal[3, 4]], np.dtype[np.float64]]: - with self._frameset_lock: - pass + def calibrate(self) -> dict[str, Calibration] | None: + + intrinsics = {} + for camera_name, camera_config in self.cameras: + serial = camera_config.identifier + device = self._enabled_devices[serial] + vp = device.pipeline_profile.get_stream(rs.stream.color).as_video_stream_profile() + intrinsics[camera_name] = vp.get_intrinsics() + + extrinsics = {} + for camera_name, camera_config in self.cameras: + serial = camera_config.identifier + device = self._enabled_devices[serial] + depth_vp = device.pipeline_profile.get_stream(rs.stream.depth).as_video_stream_profile() + color_vp = device.pipeline_profile.get_stream(rs.stream.color).as_video_stream_profile() + depth_to_color = depth_vp.get_extrinsics_to(color_vp) + + # calibrate strategy: save the last 30 frames and send it + # it can + # - ask for more new frames + # - calibration fail + # - calibration successful diff --git a/python/rcs/camera/hw.py b/python/rcs/camera/hw.py index 01fe875c..4f6b66bd 100644 --- a/python/rcs/camera/hw.py +++ b/python/rcs/camera/hw.py @@ -35,7 +35,7 @@ def poll_frame(self, camera_name: str) -> Frame: def camera_names(self) -> list[str]: """Returns the names of the cameras in this set.""" - def intrinsics(self, camera_name: str) -> np.ndarray[tuple[typing.Literal[3, 4]], np.dtype[np.float64]] + def calibrate(self) -> dict[str, Calibration] | None: """Returns camera intrinsics""" @@ -63,6 +63,7 @@ def __init__( self._next_ring_index = 0 self._buffer_len = 0 self.writer: dict[str, cv2.VideoWriter] = {} + self._calibration: dict[str, Calibration] | None = None @property def camera_names(self) -> list[str]: @@ -181,14 +182,20 @@ def warm_up(self): self.poll_frame(camera_name) self.rate_limiter() - def get_calibration(self, camera_name) -> Calibration: - # get intrinsics from the camera and check that extrinsics had been calibrated - pass + def get_calibration(self) -> dict[str, Calibration] | None: + return self._calibration + + def calibrate(self) -> bool: + self._calibration = None + cal = {} + for camera in self.cameras: + c = camera.calibrate() + if c is None: + return False + cal.update(cal) + self._calibration = cal + return True - def calibrate(self, camera_name) -> bool: - # calibrate extrinsics with aruca marker - # lets provide a calibration function - pass def polling_thread(self, warm_up: bool = True): for camera in self.cameras: diff --git a/python/rcs/camera/interface.py b/python/rcs/camera/interface.py index bc501239..9f3b5ae5 100644 --- a/python/rcs/camera/interface.py +++ b/python/rcs/camera/interface.py @@ -73,11 +73,11 @@ def close(self): def config(self, camera_name: str) -> BaseCameraConfig: """Returns the configuration object of the cameras.""" - def get_calibration(self, camera_name) -> Calibration: - """Returns camera calibration""" + def get_calibration(self) -> dict[str, Calibration] | None: + """Returns cameras' calibration. None if not calibrated or previous calibration failed.""" - def calibrate(self, camera_name) -> bool: - """Calibrates the camera""" + def calibrate(self) -> bool: + """Calibrates the cameras. Returns calibration success""" @property def camera_names(self) -> list[str]: diff --git a/python/rcs/camera/sim.py b/python/rcs/camera/sim.py index 0029a15a..2488dd3c 100644 --- a/python/rcs/camera/sim.py +++ b/python/rcs/camera/sim.py @@ -108,10 +108,13 @@ def _extrinsics(self, camera_name) -> np.ndarray[tuple[Literal[4, 4]], np.dtype[ return cam.inverse().pose_matrix() - def get_calibration(self, camera_name) -> Calibration: - return Calibration(intrinsics=self._intrinsics(camera_name), extrinsics=self._extrinsics(camera_name)) + def get_calibration(self) -> dict[str, Calibration]: + return { + camera_name: Calibration(intrinsics=self._intrinsics(camera_name), extrinsics=self._extrinsics(camera_name)) + for camera_name in self.camera_names + } - def calibrate(self, camera_name) -> bool: + def calibrate(self) -> bool: return True def config(self, camera_name: str) -> SimCameraConfig: From 2c03fb0e78d0735d252f4ce4748d06c9ddd35a45 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Wed, 9 Jul 2025 14:40:47 +0200 Subject: [PATCH 05/13] feat(env): allow nested space types --- python/rcs/envs/space_utils.py | 5 ++ python/tests/test_envs.py | 134 +++++++++++++++++++++++++++++++++ 2 files changed, 139 insertions(+) diff --git a/python/rcs/envs/space_utils.py b/python/rcs/envs/space_utils.py index 537bdf88..40c316d1 100644 --- a/python/rcs/envs/space_utils.py +++ b/python/rcs/envs/space_utils.py @@ -200,6 +200,11 @@ def value(t, path=""): {key: value(get_args(t)[1], f"{path}/{key}") for key in child_dict_keys_to_unfold[unfold_key]} ) + if not hasattr(t, "__metadata__"): + return gym.spaces.Dict( + {name: value(sub_t, path) for name, sub_t in get_type_hints(t, include_extras=True).items()} + ) + if len(t.__metadata__) == 2 and callable(t.__metadata__[0]): # space can be parametrized and is a function assert params is not None, "No params given." diff --git a/python/tests/test_envs.py b/python/tests/test_envs.py index 096c2e47..0ade40bb 100644 --- a/python/tests/test_envs.py +++ b/python/tests/test_envs.py @@ -39,6 +39,55 @@ class SimpleNestedSpace(RCSpaceType): ] +class SimpleTypeNestedSpace(RCSpaceType): + robots_joints: dict[ + Annotated[str, "robots"], + SimpleSpace, + ] + + +class CameraSpace(RCSpaceType): + data: Annotated[ + np.ndarray, + # needs to be filled with values downstream + lambda height, width, color_dim=3, dtype=np.uint8, low=0, high=255: gym.spaces.Box( + low=low, + high=high, + shape=(height, width, color_dim), + dtype=dtype, + ), + "frame", + ] + intrinsics: Annotated[ + np.ndarray, + gym.spaces.Box( + low=-np.inf, + high=np.inf, + shape=(3, 4), + dtype=np.float64, + ), + ] + extrinsics: Annotated[ + np.ndarray, + gym.spaces.Box( + low=-np.inf, + high=np.inf, + shape=(4, 4), + dtype=np.float64, + ), + ] + + +class AdvancedTypeNestedSpace(RCSpaceType): + frames: dict[ + Annotated[str, "camera_names"], + dict[ + Annotated[str, "camera_type"], # "rgb" or "depth" + CameraSpace, + ], + ] + + class AdvancedNestedSpace(RCSpaceType): frames: dict[ Annotated[str, "cams"], @@ -105,6 +154,91 @@ def test_simple_nested_space(self): } ) + def test_simple_type_nested_space(self): + assert get_space(SimpleTypeNestedSpace, child_dict_keys_to_unfold={"robots": ["robot1"]}) == gym.spaces.Dict( + { + "robots_joints": gym.spaces.Dict( + { + "robot1": gym.spaces.Dict( + { + "my_int": gym.spaces.Discrete(1), + "my_float": gym.spaces.Box(low=0, high=1, shape=(1,), dtype=np.float32), + } + ), + } + ), + } + ) + + def test_advanced_type_nested_space(self): + assert get_space( + AdvancedTypeNestedSpace, + child_dict_keys_to_unfold={"camera_names": ["cam1"], "camera_type": ["depth", "rgb"]}, + params={ + "/cam1/rgb/frame": { + "height": 480, + "width": 640, + "dtype": np.uint8, + "low": 0, + "high": 255, + "color_dim": 3, + }, + "/cam1/depth/frame": { + "height": 480, + "width": 640, + "dtype": np.uint16, + "low": 0, + "high": 65535, + "color_dim": 1, + }, + }, + ) == gym.spaces.Dict( + { + "frames": gym.spaces.Dict( + { + "cam1": gym.spaces.Dict( + { + "depth": gym.spaces.Dict( + { + "data": gym.spaces.Box(low=0, high=65535, shape=(480, 640, 1), dtype=np.uint16), + "intrinsics": gym.spaces.Box( + low=-np.inf, + high=np.inf, + shape=(3, 4), + dtype=np.float64, + ), + "extrinsics": gym.spaces.Box( + low=-np.inf, + high=np.inf, + shape=(4, 4), + dtype=np.float64, + ), + } + ), + "rgb": gym.spaces.Dict( + { + "data": gym.spaces.Box(low=0, high=255, shape=(480, 640, 3), dtype=np.uint16), + "intrinsics": gym.spaces.Box( + low=-np.inf, + high=np.inf, + shape=(3, 4), + dtype=np.float64, + ), + "extrinsics": gym.spaces.Box( + low=-np.inf, + high=np.inf, + shape=(4, 4), + dtype=np.float64, + ), + } + ), + } + ), + } + ), + } + ) + def test_advanced_nested_space(self): assert get_space( From a47d80e2371e1aca6fa9d89dc1b5969598e42c29 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Thu, 10 Jul 2025 11:52:07 +0200 Subject: [PATCH 06/13] feat: full calibration support - calibration interface - calibration data added to observation - intrinsic calibration for sim and realsense - implemented april tag calibration --- extensions/rcs_realsense/pyproject.toml | 1 + .../src/rcs_realsense/calibration.py | 152 ++++++++++++++ .../rcs_realsense/src/rcs_realsense/camera.py | 194 +++++++----------- .../rcs_realsense/src/rcs_realsense/utils.py | 4 +- python/rcs/camera/hw.py | 56 ++++- python/rcs/camera/interface.py | 14 +- python/rcs/camera/sim.py | 18 +- python/rcs/envs/base.py | 81 +++++--- 8 files changed, 331 insertions(+), 189 deletions(-) create mode 100644 extensions/rcs_realsense/src/rcs_realsense/calibration.py diff --git a/extensions/rcs_realsense/pyproject.toml b/extensions/rcs_realsense/pyproject.toml index ac3e8067..e44eba50 100644 --- a/extensions/rcs_realsense/pyproject.toml +++ b/extensions/rcs_realsense/pyproject.toml @@ -9,6 +9,7 @@ description="RCS realsense module" dependencies = [ "rcs==0.4.0", "pyrealsense2~=2.55.1", + "apriltag==0.0.16", ] readme = "README.md" maintainers = [ diff --git a/extensions/rcs_realsense/src/rcs_realsense/calibration.py b/extensions/rcs_realsense/src/rcs_realsense/calibration.py new file mode 100644 index 00000000..3acce7ea --- /dev/null +++ b/extensions/rcs_realsense/src/rcs_realsense/calibration.py @@ -0,0 +1,152 @@ +import logging +import typing +from queue import Queue +from time import sleep + +import apriltag +import cv2 +import numpy as np +from rcs.camera.hw import CalibrationStrategy +from rcs.camera.interface import Frame +from tqdm import tqdm + +logger = logging.getLogger(__name__) + + +class FR3BaseArucoCalibration(CalibrationStrategy): + """Calibration with a 3D printed aruco marker that fits around the vention's FR3 base mounting plate.""" + + def __init__(self, camera_name: str): + # base frame to camera, world to base frame + self._extrinsics: np.ndarray[tuple[typing.Literal[4], typing.Literal[4]], np.dtype[np.float64]] | None = None + self.camera_name = camera_name + self.world_to_tag = np.array( + [ + [0, -1, 0, 0.2], + [-1, 0, 0, 0], + [0, 0, -1, 0], + [0, 0, 0, 1], + ] + ) + + def calibrate( + self, + samples: Queue[Frame], + intrinsics: np.ndarray[tuple[typing.Literal[3], typing.Literal[4]], np.dtype[np.float64]], + ) -> bool: + logger.info("Calibrating camera %s. Position it as you wish and press enter.", self.camera_name) + input() + tries = 3 + while samples.qsize() < samples.maxsize - 1 and tries > 0: + logger.info("not enought frames in recorded, waiting 2 seconds...") + tries = -1 + sleep(2) + if tries == 0: + logger.warning("Calibration failed, not enough frames arrived.") + return False + frames = [] + for _ in samples.qsize(): + frames.append(samples.get()) + + _, cam_to_tag, _ = get_average_marker_pose( + frames, intrinsics=intrinsics, calib_tag_id=9, show_live_window=False + ) + + world_to_cam = self.world_to_tag @ np.linalg.inv(cam_to_tag) + self._extrinsics = world_to_cam + return True + + def get_extrinsics(self) -> np.ndarray[tuple[typing.Literal[4], typing.Literal[4]], np.dtype[np.float64]] | None: + return self._extrinsics + + +def get_average_marker_pose( + samples, + intrinsics, + calib_tag_id, + show_live_window, +): + # create detector + options = apriltag.DetectorOptions(families="tag25h9") + detector = apriltag.Detector(options=options) + + # make while loop with tqdm + poses = [] + + for frame in tqdm(samples): + + # detect tags + marker_det, pose = get_marker_pose(calib_tag_id, detector, intrinsics, frame) + + if marker_det is None: + continue + + for corner in marker_det.corners: + corner = corner.astype(int) + cv2.circle(frame, tuple(corner), 5, (0, 0, 255), -1) + + poses.append(pose) + + last_frame = frame.copy() + + camera_matrix = intrinsics + + if show_live_window: + cv2.drawFrameAxes(frame, camera_matrix, None, pose[:3, :3], pose[:3, 3], 0.1) + # show frame + cv2.imshow("frame", frame) + + # wait for key press + if cv2.waitKey(1) & 0xFF == ord("q"): + break + + if show_live_window: + cv2.destroyAllWindows() + + # calculate the average marker pose + poses = np.array(poses) + avg_pose = np.mean(poses, axis=0) + logger.info(f"Average pose: {avg_pose}") + + # paint avg pose on last frame + if show_live_window: + cv2.drawFrameAxes(last_frame, camera_matrix, None, avg_pose[:3, :3], avg_pose[:3, 3], 0.1) # type: ignore + return last_frame, avg_pose, camera_matrix + + +def get_marker_pose(calib_tag_id, detector, intrinsics, frame): + gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY) + detections = detector.detect(gray) + + # count detections + n_det = 0 + marker_det = None + for det in detections: + if det.tag_id != calib_tag_id: + continue + n_det += 1 + marker_det = det + + if n_det > 1: + raise ValueError("Expected 1 detection of tag id " f"{calib_tag_id}, got {n_det}.") + + if marker_det is None: + return None, None + + fx = intrinsics[0, 0] + fy = intrinsics[1, 1] + cx = intrinsics[0, 2] + cy = intrinsics[1, 2] + + pose, _, _ = detector.detection_pose( + marker_det, + camera_params=( + fx, + fy, + cx, + cy, + ), + tag_size=0.1, + ) + + return marker_det, pose diff --git a/extensions/rcs_realsense/src/rcs_realsense/camera.py b/extensions/rcs_realsense/src/rcs_realsense/camera.py index aa39b715..411d20f1 100644 --- a/extensions/rcs_realsense/src/rcs_realsense/camera.py +++ b/extensions/rcs_realsense/src/rcs_realsense/camera.py @@ -1,13 +1,12 @@ import logging -from dataclasses import dataclass -import threading +import queue import typing +from dataclasses import dataclass import numpy as np import pyrealsense2 as rs -from rcs.camera.interface import Calibration -from rcs.camera.hw import HardwareCamera -from rcs.camera.interface import CameraFrame, DataFrame, Frame, IMUFrame +from rcs.camera.hw import CalibrationStrategy, DummyCalibrationStrategy, HardwareCamera +from rcs.camera.interface import BaseCameraSet, CameraFrame, DataFrame, Frame, IMUFrame from rcs import common @@ -24,15 +23,19 @@ class RealSenseDevicePipeline: pipeline_profile: rs.pipeline_profile camera: RealSenseDeviceInfo depth_scale: float | None = None + color_intrinsics: np.ndarray[tuple[typing.Literal[3], typing.Literal[4]], np.dtype[np.float64]] | None = None + depth_intrinsics: np.ndarray[tuple[typing.Literal[3], typing.Literal[4]], np.dtype[np.float64]] | None = None + depth_to_color: common.Pose | None = None class RealSenseCameraSet(HardwareCamera): TIMESTAMP_FACTOR = 1e-3 - DEPTH_SCALE = 10000 + CALIBRATION_FRAME_SIZE = 30 def __init__( self, cameras: dict[str, common.BaseCameraConfig], + calibration_strategy: dict[str, CalibrationStrategy] | None = None, enable_ir_emitter: bool = False, enable_ir: bool = False, laser_power: int = 330, @@ -43,10 +46,10 @@ def __init__( self.laser_power = laser_power self.enable_imu = enable_imu self.cameras = cameras + if calibration_strategy is None: + calibration_strategy = {camera_name: DummyCalibrationStrategy() for camera_name in cameras.keys()} + self.calibration_strategy = calibration_strategy self._logger = logging.getLogger(__name__) - # TODO: safe the last 30 recorded frames - self._last_frameset: dict[rs.stream, rs.frame] | None = None - self._frameset_lock = threading.Lock() assert ( len({camera.resolution_width for camera in self.cameras.values()}) == 1 and len({camera.resolution_height for camera in self.cameras.values()}) == 1 @@ -56,6 +59,7 @@ def __init__( self.resolution_width = sample_camera_config.resolution_width self.resolution_height = sample_camera_config.resolution_height self.frame_rate = sample_camera_config.frame_rate + self._frame_buffer: dict[str, queue.Queue] = {} self.D400_config = rs.config() self.D400_config.enable_stream( @@ -143,9 +147,9 @@ def enable_devices(self, devices_to_enable: dict[str, str], enable_ir_emitter: b assert ( device_serial in self._available_devices ), f"Device {device_name} not found. Check if it is connected." - self.enable_device(self._available_devices[device_serial], enable_ir_emitter) + self.enable_device(device_name, self._available_devices[device_serial], enable_ir_emitter) - def enable_device(self, device_info: RealSenseDeviceInfo, enable_ir_emitter: bool = False): + def enable_device(self, camera_name: str, device_info: RealSenseDeviceInfo, enable_ir_emitter: bool = False): """ Enable an Intel RealSense Device @@ -172,9 +176,36 @@ def enable_device(self, device_info: RealSenseDeviceInfo, enable_ir_emitter: boo if sensor.supports(rs.option.emitter_enabled): sensor.set_option(rs.option.emitter_enabled, 1 if enable_ir_emitter else 0) sensor.set_option(rs.option.laser_power, self.laser_power) - self._enabled_devices[device_info.serial] = RealSenseDevicePipeline( - pipeline, pipeline_profile, device_info, depth_scale=sensor.get_depth_scale() + + depth_vp = pipeline_profile.get_stream(rs.stream.depth).as_video_stream_profile() + color_vp = pipeline_profile.get_stream(rs.stream.color).as_video_stream_profile() + + rs_color_intrinsics = color_vp.get_intrinsics() + color_intrinsics = np.array([ + [rs_color_intrinsics.fx, 0, (rs_color_intrinsics.width-1)/2, 0], + [0, rs_color_intrinsics.fy, (rs_color_intrinsics.height-1)/2, 0], + [0, 0, 1, 0], + ]) + rs_depth_intrinsics = depth_vp.get_intrinsics() + depth_intrinsics = np.array([ + [rs_depth_intrinsics.fx, 0, (rs_depth_intrinsics.width-1)/2, 0], + [0, rs_depth_intrinsics.fy, (rs_depth_intrinsics.height-1)/2, 0], + [0, 0, 1, 0], + ]) + + depth_to_color = depth_vp.get_extrinsics_to(color_vp) + + self._enabled_devices[camera_name] = RealSenseDevicePipeline( + pipeline, + pipeline_profile, + device_info, + depth_scale=sensor.get_depth_scale(), + color_intrinsics=color_intrinsics, + depth_intrinsics=depth_intrinsics, + depth_to_color=common.Pose(translation=depth_to_color.translation, rotation=np.array(depth_to_color.rotation).reshape(3, 3)), ) + + self._frame_buffer[camera_name] = queue.Queue(maxsize=self.CALIBRATION_FRAME_SIZE) self._logger.debug("Enabled device %s (%s)", device_info.serial, device_info.product_line) @staticmethod @@ -205,22 +236,11 @@ def enumerate_connected_devices(context: rs.context) -> dict[str, RealSenseDevic return connect_device def poll_frame(self, camera_name: str) -> Frame: - # TODO(juelg): polling should be performed in a recorder thread - # (anyway needed to record trajectory data to disk) - # and this method should just access the latest frame from the recorder - # Perhaps there even has to be a separate recorder thread for each camera - - # TODO(juelg): what is a "pose" (in frame) and how can we use it - # TODO(juelg): decide whether to use the poll method and to wait if devices get ready assert camera_name in self.camera_names, f"Camera {camera_name} not found in the enabled devices" - serial = self.cameras[camera_name].identifier - device = self._enabled_devices[serial] + device = self._enabled_devices[camera_name] streams = device.pipeline_profile.get_streams() frameset = device.pipeline.wait_for_frames() - # frameset = device.pipeline.poll_for_frames() - with self._frameset_lock: - self._last_frameset = frameset color: DataFrame | None = None ir: DataFrame | None = None @@ -235,6 +255,10 @@ def to_ts(frame: rs.frame) -> float: # convert to seconds return frame.get_timestamp() * RealSenseCameraSet.TIMESTAMP_FACTOR + color_extrinsics = self.calibration_strategy[camera_name].get_extrinsics() + depth_to_color = device.depth_to_color + depth_extrinsics = color_extrinsics @ depth_to_color.inverse().pose_matrix() if color_extrinsics is not None else None + timestamps = [] for stream in streams: if rs.stream.infrared == stream.stream_type(): @@ -242,10 +266,20 @@ def to_ts(frame: rs.frame) -> float: ir = DataFrame(data=to_numpy(frame), timestamp=to_ts(frame)) elif rs.stream.color == stream.stream_type(): frame = frameset.get_color_frame() - color = DataFrame(data=to_numpy(frame)[:, :, ::-1], timestamp=to_ts(frame)) + color = DataFrame( + data=to_numpy(frame)[:, :, ::-1], + timestamp=to_ts(frame), + intrinsics=device.color_intrinsics, + extrinsics=color_extrinsics, + ) elif rs.stream.depth == stream.stream_type(): frame = frameset.get_depth_frame() - depth = DataFrame(data=to_numpy(frame), timestamp=to_ts(frame)) + depth = DataFrame( + data=(to_numpy(frame).astype(np.float64) * device.depth_scale / BaseCameraSet.DEPTH_SCALE).astype(np.int16), + timestamp=to_ts(frame), + intrinsics=device.depth_intrinsics, + extrinsics=depth_extrinsics, + ) elif rs.stream.accel == stream.stream_type(): frame = frameset.first(stream.stream_index()) md = frame.as_motion_frame().get_motion_data() @@ -261,86 +295,15 @@ def to_ts(frame: rs.frame) -> float: timestamps.append(to_ts(frame)) assert color is not None, "Color frame not found" - # TODO: check depth type (should be int16) cf = CameraFrame( color=color, ir=ir, - depth=(depth.astype(np.float64) * device.depth_scale / self.DEPTH_SCALE).astype(np.int16), + depth=depth, ) imu = IMUFrame(accel=accel, gyro=gyro) - return Frame(camera=cf, imu=imu, avg_timestamp=float(np.mean(timestamps)) if len(timestamps) > 0 else None) - - def get_depth_shape(self): - """ - Retruns width and height of the depth stream for one arbitrary device - - Returns: - ----------- - width : int - height: int - """ - width = -1 - height = -1 - for device in self._enabled_devices.values(): - for stream in device.pipeline_profile.get_streams(): - if rs.stream.depth == stream.stream_type(): - width = stream.as_video_stream_profile().width() - height = stream.as_video_stream_profile().height() - return width, height - - def get_device_intrinsics( - self, frames: dict[str, dict[rs.stream, rs.frame]] - ) -> dict[str, dict[rs.stream, rs.intrinsics]]: - """ - Get the intrinsics of the imager using its frame delivered by the realsense device - - Parameters: - ----------- - frames : rs::frame - The frame grabbed from the imager inside the Intel RealSense for which the intrinsic is needed - - Return: - ----------- - device_intrinsics : dict - keys : serial - Serial number of the device - values: [key] - Intrinsics of the corresponding device - """ - device_intrinsics: dict[str, dict[rs.stream, rs.intrinsics]] = {} - for device_name, frameset in frames.items(): - device_intrinsics[device_name] = {} - for key, value in frameset.items(): - device_intrinsics[device_name][key] = value.get_profile().as_video_stream_profile().get_intrinsics() - return device_intrinsics - - def get_depth_to_color_extrinsics(self, frames: dict[rs.stream, rs.frame]): - """ - Get the extrinsics between the depth imager 1 and the color imager using its frame delivered by the realsense device - - Parameters: - ----------- - frames : rs::frame - The frame grabbed from the imager inside the Intel RealSense for which the intrinsic is needed - - Return: - ----------- - device_intrinsics : dict - keys : serial - Serial number of the device - values: [key] - Extrinsics of the corresponding device - """ - device_extrinsics = {} - for dev_info, frameset in frames.items(): - serial = dev_info[0] - device_extrinsics[serial] = ( - frameset[rs.stream.depth] - .get_profile() - .as_video_stream_profile() - .get_extrinsics_to(frameset[rs.stream.color].get_profile()) - ) - return device_extrinsics + f = Frame(camera=cf, imu=imu, avg_timestamp=float(np.mean(timestamps)) if len(timestamps) > 0 else None) + self._frame_buffer[camera_name].put(f) + return f def disable_streams(self): self.D400_config.disable_all_streams() @@ -364,25 +327,8 @@ def enable_emitter(self, enable_ir_emitter=True): if enable_ir_emitter: sensor.set_option(rs.option.laser_power, self.laser_power) - def calibrate(self) -> dict[str, Calibration] | None: - - intrinsics = {} - for camera_name, camera_config in self.cameras: - serial = camera_config.identifier - device = self._enabled_devices[serial] - vp = device.pipeline_profile.get_stream(rs.stream.color).as_video_stream_profile() - intrinsics[camera_name] = vp.get_intrinsics() - - extrinsics = {} - for camera_name, camera_config in self.cameras: - serial = camera_config.identifier - device = self._enabled_devices[serial] - depth_vp = device.pipeline_profile.get_stream(rs.stream.depth).as_video_stream_profile() - color_vp = device.pipeline_profile.get_stream(rs.stream.color).as_video_stream_profile() - depth_to_color = depth_vp.get_extrinsics_to(color_vp) - - # calibrate strategy: save the last 30 frames and send it - # it can - # - ask for more new frames - # - calibration fail - # - calibration successful + def calibrate(self) -> bool: + for camera_name in self.cameras: + self.calibration_strategy[camera_name].calibrate( + intrinsics=self._enabled_devices[camera_name].color_intrinsics, samples=self._frame_buffer + ) diff --git a/extensions/rcs_realsense/src/rcs_realsense/utils.py b/extensions/rcs_realsense/src/rcs_realsense/utils.py index 372dc04d..8cb1efa4 100644 --- a/extensions/rcs_realsense/src/rcs_realsense/utils.py +++ b/extensions/rcs_realsense/src/rcs_realsense/utils.py @@ -1,3 +1,4 @@ +from rcs_realsense.calibration import FR3BaseArucoCalibration from rcs_realsense.camera import RealSenseCameraSet from rcs import common @@ -10,4 +11,5 @@ def default_realsense(name2id: dict[str, str] | None) -> RealSenseCameraSet | No name: common.BaseCameraConfig(identifier=id, resolution_width=1280, resolution_height=720, frame_rate=30) for name, id in name2id.items() } - return RealSenseCameraSet(cameras=cameras) + calibration_strategy = {name: FR3BaseArucoCalibration(name) for name in name2id} + return RealSenseCameraSet(cameras=cameras, calibration_strategy=calibration_strategy) diff --git a/python/rcs/camera/hw.py b/python/rcs/camera/hw.py index 4f6b66bd..de017fc0 100644 --- a/python/rcs/camera/hw.py +++ b/python/rcs/camera/hw.py @@ -4,12 +4,13 @@ from collections.abc import Sequence from datetime import datetime from pathlib import Path +from queue import Queue from time import sleep import cv2 import numpy as np from rcs._core.common import BaseCameraConfig -from rcs.camera.interface import BaseCameraSet, Calibration, Frame, FrameSet +from rcs.camera.interface import BaseCameraSet, Frame, FrameSet from rcs.utils import SimpleFrameRate @@ -35,10 +36,46 @@ def poll_frame(self, camera_name: str) -> Frame: def camera_names(self) -> list[str]: """Returns the names of the cameras in this set.""" - def calibrate(self) -> dict[str, Calibration] | None: + def calibrate(self) -> bool: """Returns camera intrinsics""" +N = typing.TypeVar("N", bound=int) +H = typing.TypeVar("H", bound=int) +W = typing.TypeVar("W", bound=int) + + +class CalibrationStrategy(typing.Protocol): + """Implementation the hardware dependend calibration strategy.""" + + def calibrate( + self, + samples: Queue[Frame], + intrinsics: np.ndarray[tuple[typing.Literal[3], typing.Literal[4]], np.dtype[np.float64]], + ) -> bool: + """Implements algorithm to calibrate the camera.""" + + def get_extrinsics(self) -> np.ndarray[tuple[typing.Literal[4], typing.Literal[4]], np.dtype[np.float64]] | None: + """ + Returns the calibrated extrinsic, can also be cached. If not calibrated then it returns None. + It is urged to perform efficient caching for this method as it is called in each step. + """ + + +class DummyCalibrationStrategy(CalibrationStrategy): + """Always returns identity extrinsics.""" + + def calibrate( + self, + samples: Queue[Frame], + intrinsics: np.ndarray[tuple[typing.Literal[3], typing.Literal[4]], np.dtype[np.float64]], + ) -> bool: + return True + + def get_extrinsics(self) -> np.ndarray[tuple[typing.Literal[4], typing.Literal[4]], np.dtype[np.float64]] | None: + return np.eye(4) + + class HardwareCameraSet(BaseCameraSet): """This base class polls in a separate thread for all cameras and stores them in a buffer. @@ -63,7 +100,6 @@ def __init__( self._next_ring_index = 0 self._buffer_len = 0 self.writer: dict[str, cv2.VideoWriter] = {} - self._calibration: dict[str, Calibration] | None = None @property def camera_names(self) -> list[str]: @@ -182,21 +218,13 @@ def warm_up(self): self.poll_frame(camera_name) self.rate_limiter() - def get_calibration(self) -> dict[str, Calibration] | None: - return self._calibration - def calibrate(self) -> bool: - self._calibration = None - cal = {} for camera in self.cameras: c = camera.calibrate() if c is None: return False - cal.update(cal) - self._calibration = cal return True - def polling_thread(self, warm_up: bool = True): for camera in self.cameras: camera.open() @@ -240,3 +268,9 @@ def config(self, camera_name: str) -> BaseCameraConfig: def poll_frame(self, camera_name: str) -> Frame: return self.camera_dict[camera_name].poll_frame(camera_name) + + def __enter__(self): + pass + + def __exit__(self, **kwargs): + self.close() diff --git a/python/rcs/camera/interface.py b/python/rcs/camera/interface.py index 9f3b5ae5..5d5c75c2 100644 --- a/python/rcs/camera/interface.py +++ b/python/rcs/camera/interface.py @@ -9,19 +9,14 @@ logger = logging.getLogger(__name__) logger.setLevel(logging.INFO) -@dataclass(kw_only=True) -class Calibration: - intrinsics: np.ndarray[tuple[Literal[3, 4]], np.dtype[np.float64]] | None = None - extrinsics: np.ndarray[tuple[Literal[4, 4]], np.dtype[np.float64]] | None = None - @dataclass(kw_only=True) class DataFrame: data: Any # timestamp in posix time timestamp: float | None = None - intrinsics: np.ndarray[tuple[Literal[3, 4]], np.dtype[np.float64]] | None = None - extrinsics: np.ndarray[tuple[Literal[4, 4]], np.dtype[np.float64]] | None = None + intrinsics: np.ndarray[tuple[Literal[3], Literal[4]], np.dtype[np.float64]] | None = None + extrinsics: np.ndarray[tuple[Literal[4], Literal[4]], np.dtype[np.float64]] | None = None @dataclass(kw_only=True) @@ -55,6 +50,8 @@ class FrameSet: class BaseCameraSet(Protocol): """Interface for a set of cameras for sim and hardware""" + DEPTH_SCALE = 1000 + def buffer_size(self) -> int: """Returns size of the internal buffer.""" @@ -73,9 +70,6 @@ def close(self): def config(self, camera_name: str) -> BaseCameraConfig: """Returns the configuration object of the cameras.""" - def get_calibration(self) -> dict[str, Calibration] | None: - """Returns cameras' calibration. None if not calibrated or previous calibration failed.""" - def calibrate(self) -> bool: """Calibrates the cameras. Returns calibration success""" diff --git a/python/rcs/camera/sim.py b/python/rcs/camera/sim.py index 2488dd3c..ac96bf92 100644 --- a/python/rcs/camera/sim.py +++ b/python/rcs/camera/sim.py @@ -10,7 +10,7 @@ from rcs._core.sim import FrameSet as _FrameSet from rcs._core.sim import SimCameraConfig from rcs._core.sim import SimCameraSet as _SimCameraSet -from rcs.camera.interface import Calibration, CameraFrame, DataFrame, Frame, FrameSet +from rcs.camera.interface import CameraFrame, DataFrame, Frame, FrameSet from rcs import sim @@ -74,10 +74,16 @@ def _cpp_to_python_frames(self, cpp_frameset: _FrameSet | None) -> FrameSet | No cameraframe = CameraFrame( color=DataFrame( - data=color_np_frame, timestamp=cpp_frameset.timestamp + data=color_np_frame, + timestamp=cpp_frameset.timestamp, + intrinsics=self._intrinsics(color_name), + extrinsics=self._extrinsics(color_frame), ), depth=DataFrame( - data=depth_np_frame, timestamp=cpp_frameset.timestamp + data=depth_np_frame, + timestamp=cpp_frameset.timestamp, + intrinsics=self._intrinsics(depth_frame), + extrinsics=self._extrinsics(depth_frame), ), ) frame = Frame(camera=cameraframe, avg_timestamp=cpp_frameset.timestamp) @@ -108,12 +114,6 @@ def _extrinsics(self, camera_name) -> np.ndarray[tuple[Literal[4, 4]], np.dtype[ return cam.inverse().pose_matrix() - def get_calibration(self) -> dict[str, Calibration]: - return { - camera_name: Calibration(intrinsics=self._intrinsics(camera_name), extrinsics=self._extrinsics(camera_name)) - for camera_name in self.camera_names - } - def calibrate(self) -> bool: return True diff --git a/python/rcs/envs/base.py b/python/rcs/envs/base.py index 11370c03..c6855a35 100644 --- a/python/rcs/envs/base.py +++ b/python/rcs/envs/base.py @@ -118,39 +118,44 @@ class HandVecDictType(RCSpaceType): ] +class CameraDataDictType(RCSpaceType): + data: Annotated[ + np.ndarray, + # needs to be filled with values downstream + lambda height, width, color_dim=3, dtype=np.uint8, low=0, high=255: gym.spaces.Box( + low=low, + high=high, + shape=(height, width, color_dim), + dtype=dtype, + ), + "frame", + ] + intrinsics: Annotated[ + np.ndarray, + gym.spaces.Box( + low=-np.inf, + high=np.inf, + shape=(3, 4), + dtype=np.float64, + ), + ] + extrinsics: Annotated[ + np.ndarray, + gym.spaces.Box( + low=-np.inf, + high=np.inf, + shape=(4, 4), + dtype=np.float64, + ), + ] + + class CameraDictType(RCSpaceType): frames: dict[ Annotated[str, "camera_names"], dict[ Annotated[str, "camera_type"], # "rgb" or "depth" - Annotated[ - np.ndarray, - # needs to be filled with values downstream - lambda height, width, color_dim=3, dtype=np.uint8, low=0, high=255: gym.spaces.Box( - low=low, - high=high, - shape=(height, width, color_dim), - dtype=dtype, - ), - "frame", - ], - ], - ] - - -class DigitCameraDictType(RCSpaceType): - digit_frames: dict[ - Annotated[str, "camera_names"], - Annotated[ - np.ndarray, - # needs to be filled with values downstream - lambda height, width, color_dim=3, dtype=np.uint8, low=0, high=255: gym.spaces.Box( - low=low, - high=high, - shape=(height, width, color_dim), - dtype=dtype, - ), - "digit_frames", + CameraDataDictType, ], ] @@ -602,9 +607,9 @@ def __init__(self, env, camera_set: BaseCameraSet, include_depth: bool = False): "height": camera_set.config(name).resolution_height, "width": camera_set.config(name).resolution_width, "color_dim": 1, - "dtype": np.float32, - "low": 0.0, - "high": 1.0, + "dtype": np.uint16, + "low": -np.inf, + "high": np.inf, } for name in camera_set.camera_names } @@ -643,12 +648,20 @@ def check_depth(depth): frame_dict: dict[str, dict[str, np.ndarray]] = { camera_name: ( { - self.RGB_KEY: frame.camera.color.data, - self.DEPTH_KEY: frame.camera.depth.data, # type: ignore + self.RGB_KEY: CameraDataDictType( + data=frame.camera.color.data, + intrinsics=frame.camera.color.intrinsics, + extrinsics=frame.camera.color.extrinsics, + ), + self.DEPTH_KEY: CameraDataDictType(data=frame.camera.depth.data, intrinsics=frame.camera.depth.intrinsics, extrinsics=frame.camera.depth.extrinsics), # type: ignore } if check_depth(frame.camera.depth) else { - self.RGB_KEY: frame.camera.color.data, + self.RGB_KEY: CameraDataDictType( + data=frame.camera.color.data, + intrinsics=frame.camera.color.intrinsics, + extrinsics=frame.camera.color.extrinsics, + ), } ) for camera_name, frame in frameset.frames.items() From 479d5c5203cf1b784e14752117c05149b4fdfd0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Thu, 10 Jul 2025 17:58:25 +0200 Subject: [PATCH 07/13] refactor: calibration with list instead of queue --- .../src/rcs_realsense/calibration.py | 39 ++++++------ .../rcs_realsense/src/rcs_realsense/camera.py | 62 +++++++++++++------ python/rcs/camera/hw.py | 19 ++++-- 3 files changed, 76 insertions(+), 44 deletions(-) diff --git a/extensions/rcs_realsense/src/rcs_realsense/calibration.py b/extensions/rcs_realsense/src/rcs_realsense/calibration.py index 3acce7ea..e6c6ede1 100644 --- a/extensions/rcs_realsense/src/rcs_realsense/calibration.py +++ b/extensions/rcs_realsense/src/rcs_realsense/calibration.py @@ -1,11 +1,13 @@ import logging +import threading import typing -from queue import Queue from time import sleep import apriltag import cv2 import numpy as np +from PIL import Image, ImageDraw +from rcs._core import common from rcs.camera.hw import CalibrationStrategy from rcs.camera.interface import Frame from tqdm import tqdm @@ -20,39 +22,37 @@ def __init__(self, camera_name: str): # base frame to camera, world to base frame self._extrinsics: np.ndarray[tuple[typing.Literal[4], typing.Literal[4]], np.dtype[np.float64]] | None = None self.camera_name = camera_name - self.world_to_tag = np.array( - [ - [0, -1, 0, 0.2], - [-1, 0, 0, 0], - [0, 0, -1, 0], - [0, 0, 0, 1], - ] - ) + self.tag_to_world = common.Pose(rpy_vector=[np.pi, 0, -np.pi / 2], translation=[0.2, 0, 0]).pose_matrix() def calibrate( self, - samples: Queue[Frame], + samples: list[Frame], intrinsics: np.ndarray[tuple[typing.Literal[3], typing.Literal[4]], np.dtype[np.float64]], + lock: threading.Lock, ) -> bool: logger.info("Calibrating camera %s. Position it as you wish and press enter.", self.camera_name) input() tries = 3 - while samples.qsize() < samples.maxsize - 1 and tries > 0: + while len(samples) < 10 and tries > 0: logger.info("not enought frames in recorded, waiting 2 seconds...") tries = -1 sleep(2) if tries == 0: logger.warning("Calibration failed, not enough frames arrived.") return False + frames = [] - for _ in samples.qsize(): - frames.append(samples.get()) + with lock: + for sample in samples: + frames.append(sample.camera.color.data.copy()) + print(frames) - _, cam_to_tag, _ = get_average_marker_pose( - frames, intrinsics=intrinsics, calib_tag_id=9, show_live_window=False + last_frame, tag_to_cam = get_average_marker_pose( + frames, intrinsics=intrinsics, calib_tag_id=9, show_live_window=True ) - world_to_cam = self.world_to_tag @ np.linalg.inv(cam_to_tag) + cam_to_world = self.tag_to_world @ np.linalg.inv(tag_to_cam) + world_to_cam = np.linalg.inv(cam_to_world) self._extrinsics = world_to_cam return True @@ -89,7 +89,7 @@ def get_average_marker_pose( last_frame = frame.copy() - camera_matrix = intrinsics + camera_matrix = intrinsics[:3, :3] if show_live_window: cv2.drawFrameAxes(frame, camera_matrix, None, pose[:3, :3], pose[:3, 3], 0.1) @@ -109,9 +109,8 @@ def get_average_marker_pose( logger.info(f"Average pose: {avg_pose}") # paint avg pose on last frame - if show_live_window: - cv2.drawFrameAxes(last_frame, camera_matrix, None, avg_pose[:3, :3], avg_pose[:3, 3], 0.1) # type: ignore - return last_frame, avg_pose, camera_matrix + # cv2.drawFrameAxes(last_frame, camera_matrix, None, avg_pose[:3, :3], avg_pose[:3, 3], 0.1) # type: ignore + return last_frame, avg_pose def get_marker_pose(calib_tag_id, detector, intrinsics, frame): diff --git a/extensions/rcs_realsense/src/rcs_realsense/camera.py b/extensions/rcs_realsense/src/rcs_realsense/camera.py index 411d20f1..188b325a 100644 --- a/extensions/rcs_realsense/src/rcs_realsense/camera.py +++ b/extensions/rcs_realsense/src/rcs_realsense/camera.py @@ -1,5 +1,6 @@ +import copy import logging -import queue +import threading import typing from dataclasses import dataclass @@ -59,7 +60,8 @@ def __init__( self.resolution_width = sample_camera_config.resolution_width self.resolution_height = sample_camera_config.resolution_height self.frame_rate = sample_camera_config.frame_rate - self._frame_buffer: dict[str, queue.Queue] = {} + self._frame_buffer_lock: dict[str, threading.Lock] = {} + self._frame_buffer: dict[str, list] = {} self.D400_config = rs.config() self.D400_config.enable_stream( @@ -181,17 +183,21 @@ def enable_device(self, camera_name: str, device_info: RealSenseDeviceInfo, enab color_vp = pipeline_profile.get_stream(rs.stream.color).as_video_stream_profile() rs_color_intrinsics = color_vp.get_intrinsics() - color_intrinsics = np.array([ - [rs_color_intrinsics.fx, 0, (rs_color_intrinsics.width-1)/2, 0], - [0, rs_color_intrinsics.fy, (rs_color_intrinsics.height-1)/2, 0], - [0, 0, 1, 0], - ]) + color_intrinsics = np.array( + [ + [rs_color_intrinsics.fx, 0, (rs_color_intrinsics.width - 1) / 2, 0], + [0, rs_color_intrinsics.fy, (rs_color_intrinsics.height - 1) / 2, 0], + [0, 0, 1, 0], + ] + ) rs_depth_intrinsics = depth_vp.get_intrinsics() - depth_intrinsics = np.array([ - [rs_depth_intrinsics.fx, 0, (rs_depth_intrinsics.width-1)/2, 0], - [0, rs_depth_intrinsics.fy, (rs_depth_intrinsics.height-1)/2, 0], - [0, 0, 1, 0], - ]) + depth_intrinsics = np.array( + [ + [rs_depth_intrinsics.fx, 0, (rs_depth_intrinsics.width - 1) / 2, 0], + [0, rs_depth_intrinsics.fy, (rs_depth_intrinsics.height - 1) / 2, 0], + [0, 0, 1, 0], + ] + ) depth_to_color = depth_vp.get_extrinsics_to(color_vp) @@ -202,10 +208,13 @@ def enable_device(self, camera_name: str, device_info: RealSenseDeviceInfo, enab depth_scale=sensor.get_depth_scale(), color_intrinsics=color_intrinsics, depth_intrinsics=depth_intrinsics, - depth_to_color=common.Pose(translation=depth_to_color.translation, rotation=np.array(depth_to_color.rotation).reshape(3, 3)), + depth_to_color=common.Pose( + translation=depth_to_color.translation, rotation=np.array(depth_to_color.rotation).reshape(3, 3) + ), ) - self._frame_buffer[camera_name] = queue.Queue(maxsize=self.CALIBRATION_FRAME_SIZE) + self._frame_buffer[camera_name] = [] + self._frame_buffer_lock[camera_name] = threading.Lock() self._logger.debug("Enabled device %s (%s)", device_info.serial, device_info.product_line) @staticmethod @@ -257,7 +266,9 @@ def to_ts(frame: rs.frame) -> float: color_extrinsics = self.calibration_strategy[camera_name].get_extrinsics() depth_to_color = device.depth_to_color - depth_extrinsics = color_extrinsics @ depth_to_color.inverse().pose_matrix() if color_extrinsics is not None else None + depth_extrinsics = ( + color_extrinsics @ depth_to_color.inverse().pose_matrix() if color_extrinsics is not None else None + ) timestamps = [] for stream in streams: @@ -275,7 +286,9 @@ def to_ts(frame: rs.frame) -> float: elif rs.stream.depth == stream.stream_type(): frame = frameset.get_depth_frame() depth = DataFrame( - data=(to_numpy(frame).astype(np.float64) * device.depth_scale / BaseCameraSet.DEPTH_SCALE).astype(np.int16), + data=(to_numpy(frame).astype(np.float64) * device.depth_scale / BaseCameraSet.DEPTH_SCALE).astype( + np.int16 + ), timestamp=to_ts(frame), intrinsics=device.depth_intrinsics, extrinsics=depth_extrinsics, @@ -302,7 +315,10 @@ def to_ts(frame: rs.frame) -> float: ) imu = IMUFrame(accel=accel, gyro=gyro) f = Frame(camera=cf, imu=imu, avg_timestamp=float(np.mean(timestamps)) if len(timestamps) > 0 else None) - self._frame_buffer[camera_name].put(f) + with self._frame_buffer_lock[camera_name]: + if len(self._frame_buffer[camera_name]) >= self.CALIBRATION_FRAME_SIZE: + self._frame_buffer[camera_name].pop(0) + self._frame_buffer[camera_name].append(copy.deepcopy(f)) return f def disable_streams(self): @@ -329,6 +345,12 @@ def enable_emitter(self, enable_ir_emitter=True): def calibrate(self) -> bool: for camera_name in self.cameras: - self.calibration_strategy[camera_name].calibrate( - intrinsics=self._enabled_devices[camera_name].color_intrinsics, samples=self._frame_buffer - ) + if not self.calibration_strategy[camera_name].calibrate( + intrinsics=self._enabled_devices[camera_name].color_intrinsics, + samples=self._frame_buffer[camera_name], + lock=self._frame_buffer_lock[camera_name], + ): + self._logger.warning(f"Calibration of camera {camera_name} failed.") + return False + self._logger.info("Calibration successful.") + return True diff --git a/python/rcs/camera/hw.py b/python/rcs/camera/hw.py index de017fc0..795fd104 100644 --- a/python/rcs/camera/hw.py +++ b/python/rcs/camera/hw.py @@ -50,10 +50,20 @@ class CalibrationStrategy(typing.Protocol): def calibrate( self, - samples: Queue[Frame], + samples: list[Frame], intrinsics: np.ndarray[tuple[typing.Literal[3], typing.Literal[4]], np.dtype[np.float64]], + lock: threading.Lock, ) -> bool: - """Implements algorithm to calibrate the camera.""" + """Implements algorithm to calibrate the camera. + + Args: + samples: List of frames to use for calibration. + intrinsics: Intrinsic camera parameters, e.g. from a previous calibration. + lock: A lock to ensure thread safety during calibration as the samples might refresh in parallel. + + Returns: + bool: True if calibration was successful, False otherwise. + """ def get_extrinsics(self) -> np.ndarray[tuple[typing.Literal[4], typing.Literal[4]], np.dtype[np.float64]] | None: """ @@ -67,8 +77,9 @@ class DummyCalibrationStrategy(CalibrationStrategy): def calibrate( self, - samples: Queue[Frame], + samples: list[Frame], intrinsics: np.ndarray[tuple[typing.Literal[3], typing.Literal[4]], np.dtype[np.float64]], + lock: threading.Lock, ) -> bool: return True @@ -272,5 +283,5 @@ def poll_frame(self, camera_name: str) -> Frame: def __enter__(self): pass - def __exit__(self, **kwargs): + def __exit__(self, *args, **kwargs): self.close() From cc7bbb055b76f63e9de9709ee41652265219a1a8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Fri, 11 Jul 2025 10:14:08 +0200 Subject: [PATCH 08/13] fix(cam): depth scaling corrected --- extensions/rcs_realsense/src/rcs_realsense/camera.py | 4 ++-- python/rcs/camera/sim.py | 12 +++++++----- python/rcs/envs/base.py | 5 +++-- 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/extensions/rcs_realsense/src/rcs_realsense/camera.py b/extensions/rcs_realsense/src/rcs_realsense/camera.py index 188b325a..2d422dca 100644 --- a/extensions/rcs_realsense/src/rcs_realsense/camera.py +++ b/extensions/rcs_realsense/src/rcs_realsense/camera.py @@ -286,8 +286,8 @@ def to_ts(frame: rs.frame) -> float: elif rs.stream.depth == stream.stream_type(): frame = frameset.get_depth_frame() depth = DataFrame( - data=(to_numpy(frame).astype(np.float64) * device.depth_scale / BaseCameraSet.DEPTH_SCALE).astype( - np.int16 + data=(to_numpy(frame).astype(np.float64) * device.depth_scale * BaseCameraSet.DEPTH_SCALE).astype( + np.uint16 ), timestamp=to_ts(frame), intrinsics=device.depth_intrinsics, diff --git a/python/rcs/camera/sim.py b/python/rcs/camera/sim.py index ac96bf92..6c2e2049 100644 --- a/python/rcs/camera/sim.py +++ b/python/rcs/camera/sim.py @@ -10,7 +10,7 @@ from rcs._core.sim import FrameSet as _FrameSet from rcs._core.sim import SimCameraConfig from rcs._core.sim import SimCameraSet as _SimCameraSet -from rcs.camera.interface import CameraFrame, DataFrame, Frame, FrameSet +from rcs.camera.interface import BaseCameraSet, CameraFrame, DataFrame, Frame, FrameSet from rcs import sim @@ -77,13 +77,15 @@ def _cpp_to_python_frames(self, cpp_frameset: _FrameSet | None) -> FrameSet | No data=color_np_frame, timestamp=cpp_frameset.timestamp, intrinsics=self._intrinsics(color_name), - extrinsics=self._extrinsics(color_frame), + extrinsics=self._extrinsics(color_name), ), depth=DataFrame( - data=depth_np_frame, + data= (depth_np_frame * BaseCameraSet.DEPTH_SCALE).astype( + np.uint16 + ), timestamp=cpp_frameset.timestamp, - intrinsics=self._intrinsics(depth_frame), - extrinsics=self._extrinsics(depth_frame), + intrinsics=self._intrinsics(depth_name), + extrinsics=self._extrinsics(depth_name), ), ) frame = Frame(camera=cameraframe, avg_timestamp=cpp_frameset.timestamp) diff --git a/python/rcs/envs/base.py b/python/rcs/envs/base.py index c6855a35..7d10c0ad 100644 --- a/python/rcs/envs/base.py +++ b/python/rcs/envs/base.py @@ -604,12 +604,13 @@ def __init__(self, env, camera_set: BaseCameraSet, include_depth: bool = False): params.update( { f"/{name}/{self.DEPTH_KEY}/frame": { + # values metric but scaled with factor rcs.BaseCameraSet.DEPTH_SCALE to fit into uint16 "height": camera_set.config(name).resolution_height, "width": camera_set.config(name).resolution_width, "color_dim": 1, "dtype": np.uint16, - "low": -np.inf, - "high": np.inf, + "low": 0, + "high": 65535, } for name in camera_set.camera_names } From e927df08da9b53524a4a482c62b274819c46b4bd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Wed, 16 Jul 2025 12:57:19 +0200 Subject: [PATCH 09/13] fix(calibration): april tag offset and not visible msg --- extensions/rcs_realsense/src/rcs_realsense/calibration.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/extensions/rcs_realsense/src/rcs_realsense/calibration.py b/extensions/rcs_realsense/src/rcs_realsense/calibration.py index e6c6ede1..f05f0a27 100644 --- a/extensions/rcs_realsense/src/rcs_realsense/calibration.py +++ b/extensions/rcs_realsense/src/rcs_realsense/calibration.py @@ -22,7 +22,7 @@ def __init__(self, camera_name: str): # base frame to camera, world to base frame self._extrinsics: np.ndarray[tuple[typing.Literal[4], typing.Literal[4]], np.dtype[np.float64]] | None = None self.camera_name = camera_name - self.tag_to_world = common.Pose(rpy_vector=[np.pi, 0, -np.pi / 2], translation=[0.2, 0, 0]).pose_matrix() + self.tag_to_world = common.Pose(rpy_vector=[np.pi, 0, -np.pi / 2], translation=[0.145, 0, 0]).pose_matrix() def calibrate( self, @@ -73,6 +73,7 @@ def get_average_marker_pose( # make while loop with tqdm poses = [] + last_frame = None for frame in tqdm(samples): # detect tags @@ -99,6 +100,9 @@ def get_average_marker_pose( # wait for key press if cv2.waitKey(1) & 0xFF == ord("q"): break + if last_frame is None: + msg = "No frames were processed, cannot calculate average pose. Check if the tag is visible." + raise ValueError(msg) if show_live_window: cv2.destroyAllWindows() From 5a76ebc02419061d06b154c692c6d9d0a72bc46e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Wed, 16 Jul 2025 20:43:16 +0200 Subject: [PATCH 10/13] fix(sim): camera render image mixup mjr_setBuffer was not called when context was switched which lead to wrong context usage and hence wrong images --- src/sim/renderer.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/src/sim/renderer.cpp b/src/sim/renderer.cpp index 95efeece..9e5e87a4 100644 --- a/src/sim/renderer.cpp +++ b/src/sim/renderer.cpp @@ -42,6 +42,7 @@ void Renderer::register_context(const std::string& id, size_t width, mjrContext* Renderer::get_context(const std::string& id) { common::ensure_current(); + mjr_setBuffer(mjFB_OFFSCREEN, this->ctxs[id]); return this->ctxs[id]; } } // namespace sim From 2acebd8473993c414c25f69f9a062eaf19bad52a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Fri, 18 Jul 2025 12:43:00 +0200 Subject: [PATCH 11/13] style: fix format --- extensions/rcs_realsense/src/rcs_realsense/calibration.py | 4 ++-- python/rcs/camera/hw.py | 1 - python/rcs/camera/sim.py | 4 +--- src/sim/camera.cpp | 8 ++++---- src/sim/sim.cpp | 2 +- 5 files changed, 8 insertions(+), 11 deletions(-) diff --git a/extensions/rcs_realsense/src/rcs_realsense/calibration.py b/extensions/rcs_realsense/src/rcs_realsense/calibration.py index f05f0a27..38cf020c 100644 --- a/extensions/rcs_realsense/src/rcs_realsense/calibration.py +++ b/extensions/rcs_realsense/src/rcs_realsense/calibration.py @@ -6,7 +6,6 @@ import apriltag import cv2 import numpy as np -from PIL import Image, ImageDraw from rcs._core import common from rcs.camera.hw import CalibrationStrategy from rcs.camera.interface import Frame @@ -131,7 +130,8 @@ def get_marker_pose(calib_tag_id, detector, intrinsics, frame): marker_det = det if n_det > 1: - raise ValueError("Expected 1 detection of tag id " f"{calib_tag_id}, got {n_det}.") + msg = f"Expected 1 detection of tag id {calib_tag_id}, got {n_det}." + raise ValueError(msg) if marker_det is None: return None, None diff --git a/python/rcs/camera/hw.py b/python/rcs/camera/hw.py index 795fd104..a04e5289 100644 --- a/python/rcs/camera/hw.py +++ b/python/rcs/camera/hw.py @@ -4,7 +4,6 @@ from collections.abc import Sequence from datetime import datetime from pathlib import Path -from queue import Queue from time import sleep import cv2 diff --git a/python/rcs/camera/sim.py b/python/rcs/camera/sim.py index 6c2e2049..fd79bd10 100644 --- a/python/rcs/camera/sim.py +++ b/python/rcs/camera/sim.py @@ -80,9 +80,7 @@ def _cpp_to_python_frames(self, cpp_frameset: _FrameSet | None) -> FrameSet | No extrinsics=self._extrinsics(color_name), ), depth=DataFrame( - data= (depth_np_frame * BaseCameraSet.DEPTH_SCALE).astype( - np.uint16 - ), + data=(depth_np_frame * BaseCameraSet.DEPTH_SCALE).astype(np.uint16), timestamp=cpp_frameset.timestamp, intrinsics=self._intrinsics(depth_name), extrinsics=self._extrinsics(depth_name), diff --git a/src/sim/camera.cpp b/src/sim/camera.cpp index e8fb659e..a70f8525 100644 --- a/src/sim/camera.cpp +++ b/src/sim/camera.cpp @@ -87,19 +87,19 @@ void SimCameraSet::render_all() { for (auto const& [id, cam] : this->cameras_cfg) { mjrContext* ctx = this->sim->renderer.get_context(id); this->render_single(id, *ctx, this->sim->renderer.scene, - this->sim->renderer.opt); + this->sim->renderer.opt); } } void SimCameraSet::frame_callback(const std::string& id, mjrContext& ctx, mjvScene& scene, mjvOption& opt) { - if (!this->render_on_demand){ - this->render_single(id, ctx, scene, opt); + if (!this->render_on_demand) { + this->render_single(id, ctx, scene, opt); } } void SimCameraSet::render_single(const std::string& id, mjrContext& ctx, - mjvScene& scene, mjvOption& opt) { + mjvScene& scene, mjvOption& opt) { mjrRect viewport = mjr_maxViewport(&ctx); int W = viewport.width; int H = viewport.height; diff --git a/src/sim/sim.cpp b/src/sim/sim.cpp index b33caefd..bb1d3163 100644 --- a/src/sim/sim.cpp +++ b/src/sim/sim.cpp @@ -26,7 +26,7 @@ bool get_last_return_value(ConditionCallback cb) { return cb.last_return_value; } -Sim::Sim(mjModel* m, mjData* d) : m(m), d(d), renderer(m){}; +Sim::Sim(mjModel* m, mjData* d) : m(m), d(d), renderer(m) {}; bool Sim::set_config(const Config& cfg) { if (cfg.async) { From 6c2d895689752c14213b7fb96453e30a1c6bfa02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Fri, 18 Jul 2025 13:40:50 +0200 Subject: [PATCH 12/13] docs: cleaned up readme examples --- README.md | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/README.md b/README.md index 20122dcf..40d7d2f9 100644 --- a/README.md +++ b/README.md @@ -30,6 +30,7 @@ import rcs from rcs import sim from rcs._core.sim import CameraType from rcs.camera.sim import SimCameraConfig, SimCameraSet +from time import sleep simulation = sim.Sim(rcs.scenes["fr3_empty_world"]["mjb"]) urdf_path = rcs.scenes["fr3_empty_world"]["urdf"] ik = rcs.common.RL(str(urdf_path)) @@ -42,23 +43,18 @@ gripper_cfg_sim = sim.SimGripperConfig() gripper_cfg_sim.add_id("0") gripper = sim.SimGripper(simulation, gripper_cfg_sim) -# add camera to have a rendering gui -cameras = { - "wrist": SimCameraConfig( - identifier="wrist_0", - type=CameraType.fixed, - resolution_width=640, - resolution_height=480, - frame_rate=30, - ), -} -camera_set = SimCameraSet(simulation, cameras) +camera_set = SimCameraSet(simulation, {}) simulation.open_gui() +# wait for gui +sleep(5) +# step the robot 10 cm in x direction robot.set_cartesian_position( - robot.get_cartesian_position() * rcs.common.Pose(translation=np.array([0.05, 0, 0])) + robot.get_cartesian_position() * rcs.common.Pose(translation=np.array([0.1, 0, 0])) ) +# close gripper gripper.grasp() simulation.step_until_convergence() +input("press enter to close") ``` ### Gym Env Interface ```python @@ -80,7 +76,7 @@ env_rel = SimEnvCreator()( ) env_rel.get_wrapper_attr("sim").open_gui() -for _ in range(10): +for _ in range(100): obs, info = env_rel.reset() for _ in range(10): # sample random relative action and execute it @@ -110,8 +106,8 @@ For more details real the readme file of the respective extension. After the required hardware extensions are installed the examples also above work on real hardware: Switch to hardware by setting the following flag: ```python -ROBOT_INSTANCE = RobotPlatform.SIMULATION -# ROBOT_INSTANCE = RobotPlatform.HARDWARE +# ROBOT_INSTANCE = RobotPlatform.SIMULATION +ROBOT_INSTANCE = RobotPlatform.HARDWARE ``` #### Command Line Interface From 8cdae3f2fe189e9daa1c7d25a744302820a315a0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tobias=20J=C3=BClg?= Date: Fri, 18 Jul 2025 13:44:44 +0200 Subject: [PATCH 13/13] style: fix python linting --- .../src/rcs_realsense/calibration.py | 18 +++++++---------- .../rcs_realsense/src/rcs_realsense/camera.py | 20 +++++++------------ .../rcs_realsense/src/rcs_realsense/utils.py | 5 ++++- .../rcs_so101/src/rcs_so101/creators.py | 6 +++++- pyproject.toml | 1 + python/rcs/camera/digit_cam.py | 4 ++++ python/rcs/camera/interface.py | 2 +- python/rcs/camera/sim.py | 6 +++--- python/rcs/envs/base.py | 8 ++++---- python/rcs/envs/creators.py | 6 +++++- src/sim/sim.cpp | 2 +- 11 files changed, 42 insertions(+), 36 deletions(-) diff --git a/extensions/rcs_realsense/src/rcs_realsense/calibration.py b/extensions/rcs_realsense/src/rcs_realsense/calibration.py index 38cf020c..937450c5 100644 --- a/extensions/rcs_realsense/src/rcs_realsense/calibration.py +++ b/extensions/rcs_realsense/src/rcs_realsense/calibration.py @@ -21,7 +21,9 @@ def __init__(self, camera_name: str): # base frame to camera, world to base frame self._extrinsics: np.ndarray[tuple[typing.Literal[4], typing.Literal[4]], np.dtype[np.float64]] | None = None self.camera_name = camera_name - self.tag_to_world = common.Pose(rpy_vector=[np.pi, 0, -np.pi / 2], translation=[0.145, 0, 0]).pose_matrix() + self.tag_to_world = common.Pose( + rpy_vector=np.array([np.pi, 0, -np.pi / 2]), translation=np.array([0.145, 0, 0]) + ).pose_matrix() def calibrate( self, @@ -46,13 +48,11 @@ def calibrate( frames.append(sample.camera.color.data.copy()) print(frames) - last_frame, tag_to_cam = get_average_marker_pose( - frames, intrinsics=intrinsics, calib_tag_id=9, show_live_window=True - ) + _, tag_to_cam = get_average_marker_pose(frames, intrinsics=intrinsics, calib_tag_id=9, show_live_window=False) cam_to_world = self.tag_to_world @ np.linalg.inv(tag_to_cam) world_to_cam = np.linalg.inv(cam_to_world) - self._extrinsics = world_to_cam + self._extrinsics = world_to_cam # type: ignore return True def get_extrinsics(self) -> np.ndarray[tuple[typing.Literal[4], typing.Literal[4]], np.dtype[np.float64]] | None: @@ -82,8 +82,7 @@ def get_average_marker_pose( continue for corner in marker_det.corners: - corner = corner.astype(int) - cv2.circle(frame, tuple(corner), 5, (0, 0, 255), -1) + cv2.circle(frame, tuple(corner.astype(int)), 5, (0, 0, 255), -1) poses.append(pose) @@ -92,7 +91,7 @@ def get_average_marker_pose( camera_matrix = intrinsics[:3, :3] if show_live_window: - cv2.drawFrameAxes(frame, camera_matrix, None, pose[:3, :3], pose[:3, 3], 0.1) + cv2.drawFrameAxes(frame, camera_matrix, None, pose[:3, :3], pose[:3, 3], 0.1) # type: ignore # show frame cv2.imshow("frame", frame) @@ -107,12 +106,9 @@ def get_average_marker_pose( cv2.destroyAllWindows() # calculate the average marker pose - poses = np.array(poses) avg_pose = np.mean(poses, axis=0) logger.info(f"Average pose: {avg_pose}") - # paint avg pose on last frame - # cv2.drawFrameAxes(last_frame, camera_matrix, None, avg_pose[:3, :3], avg_pose[:3, 3], 0.1) # type: ignore return last_frame, avg_pose diff --git a/extensions/rcs_realsense/src/rcs_realsense/camera.py b/extensions/rcs_realsense/src/rcs_realsense/camera.py index 2d422dca..01d36e4c 100644 --- a/extensions/rcs_realsense/src/rcs_realsense/camera.py +++ b/extensions/rcs_realsense/src/rcs_realsense/camera.py @@ -48,7 +48,7 @@ def __init__( self.enable_imu = enable_imu self.cameras = cameras if calibration_strategy is None: - calibration_strategy = {camera_name: DummyCalibrationStrategy() for camera_name in cameras.keys()} + calibration_strategy = {camera_name: DummyCalibrationStrategy() for camera_name in cameras} self.calibration_strategy = calibration_strategy self._logger = logging.getLogger(__name__) assert ( @@ -123,16 +123,6 @@ def open(self): def update_available_devices(self): self._available_devices = self.enumerate_connected_devices(rs.context()) - def enable_all_devices(self, enable_ir_emitter: bool = False): - """ - Enable all the Intel RealSense Devices which are connected to the PC - - """ - self._logger.debug("%i devices have been found", len(self._available_devices)) - - for device_info in self._available_devices.values(): - self.enable_device(device_info, enable_ir_emitter) - def enable_devices(self, devices_to_enable: dict[str, str], enable_ir_emitter: bool = False): """ Enable the Intel RealSense Devices which are connected to the PC @@ -209,7 +199,7 @@ def enable_device(self, camera_name: str, device_info: RealSenseDeviceInfo, enab color_intrinsics=color_intrinsics, depth_intrinsics=depth_intrinsics, depth_to_color=common.Pose( - translation=depth_to_color.translation, rotation=np.array(depth_to_color.rotation).reshape(3, 3) + translation=depth_to_color.translation, rotation=np.array(depth_to_color.rotation).reshape(3, 3) # type: ignore ), ) @@ -266,6 +256,7 @@ def to_ts(frame: rs.frame) -> float: color_extrinsics = self.calibration_strategy[camera_name].get_extrinsics() depth_to_color = device.depth_to_color + assert depth_to_color is not None, "Depth to color extrinsics not found" depth_extrinsics = ( color_extrinsics @ depth_to_color.inverse().pose_matrix() if color_extrinsics is not None else None ) @@ -285,6 +276,7 @@ def to_ts(frame: rs.frame) -> float: ) elif rs.stream.depth == stream.stream_type(): frame = frameset.get_depth_frame() + assert device.depth_scale is not None, "Depth scale not found" depth = DataFrame( data=(to_numpy(frame).astype(np.float64) * device.depth_scale * BaseCameraSet.DEPTH_SCALE).astype( np.uint16 @@ -345,8 +337,10 @@ def enable_emitter(self, enable_ir_emitter=True): def calibrate(self) -> bool: for camera_name in self.cameras: + color_intrinsics = self._enabled_devices[camera_name].color_intrinsics + assert color_intrinsics is not None, f"Color intrinsics for camera {camera_name} not found" if not self.calibration_strategy[camera_name].calibrate( - intrinsics=self._enabled_devices[camera_name].color_intrinsics, + intrinsics=color_intrinsics, samples=self._frame_buffer[camera_name], lock=self._frame_buffer_lock[camera_name], ): diff --git a/extensions/rcs_realsense/src/rcs_realsense/utils.py b/extensions/rcs_realsense/src/rcs_realsense/utils.py index 8cb1efa4..b63842bb 100644 --- a/extensions/rcs_realsense/src/rcs_realsense/utils.py +++ b/extensions/rcs_realsense/src/rcs_realsense/utils.py @@ -1,3 +1,6 @@ +import typing + +from rcs.camera.hw import CalibrationStrategy from rcs_realsense.calibration import FR3BaseArucoCalibration from rcs_realsense.camera import RealSenseCameraSet @@ -11,5 +14,5 @@ def default_realsense(name2id: dict[str, str] | None) -> RealSenseCameraSet | No name: common.BaseCameraConfig(identifier=id, resolution_width=1280, resolution_height=720, frame_rate=30) for name, id in name2id.items() } - calibration_strategy = {name: FR3BaseArucoCalibration(name) for name in name2id} + calibration_strategy = {name: typing.cast(CalibrationStrategy, FR3BaseArucoCalibration(name)) for name in name2id} return RealSenseCameraSet(cameras=cameras, calibration_strategy=calibration_strategy) diff --git a/extensions/rcs_so101/src/rcs_so101/creators.py b/extensions/rcs_so101/src/rcs_so101/creators.py index acdf1584..98ae5404 100644 --- a/extensions/rcs_so101/src/rcs_so101/creators.py +++ b/extensions/rcs_so101/src/rcs_so101/creators.py @@ -1,4 +1,5 @@ import logging +import typing from os import PathLike from pathlib import Path from typing import Type @@ -15,6 +16,7 @@ from lerobot.common.teleoperators.so101_leader.so101_leader import SO101Leader from lerobot.common.teleoperators.utils import make_teleoperator_from_config from rcs.camera.hw import HardwareCameraSet +from rcs.camera.interface import BaseCameraSet from rcs.camera.sim import SimCameraSet from rcs.envs.base import ( CameraSetWrapper, @@ -125,7 +127,9 @@ def __call__( # type: ignore env = RobotSimWrapper(env, simulation, sim_wrapper) if cameras is not None: - camera_set = SimCameraSet(simulation, cameras, physical_units=True, render_on_demand=True) + camera_set = typing.cast( + BaseCameraSet, SimCameraSet(simulation, cameras, physical_units=True, render_on_demand=True) + ) env = CameraSetWrapper(env, camera_set, include_depth=True) if gripper_cfg is not None and isinstance(gripper_cfg, SimGripperConfig): diff --git a/pyproject.toml b/pyproject.toml index e9906cd3..7bd11928 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -101,6 +101,7 @@ ignore = [ "PT018", # assertion should be broken down into multiple parts "NPY002", "G004", # Logging format string does not contain any placeholders + "ARG002", ] [tool.pylint.format] diff --git a/python/rcs/camera/digit_cam.py b/python/rcs/camera/digit_cam.py index 3a1be493..bb7113fb 100644 --- a/python/rcs/camera/digit_cam.py +++ b/python/rcs/camera/digit_cam.py @@ -51,3 +51,7 @@ def close(self): def config(self, camera_name) -> BaseCameraConfig: return self.cameras[camera_name] + + def calibrate(self) -> bool: + """No calibration needed for DIGIT cameras.""" + return True diff --git a/python/rcs/camera/interface.py b/python/rcs/camera/interface.py index 5d5c75c2..711d16a5 100644 --- a/python/rcs/camera/interface.py +++ b/python/rcs/camera/interface.py @@ -50,7 +50,7 @@ class FrameSet: class BaseCameraSet(Protocol): """Interface for a set of cameras for sim and hardware""" - DEPTH_SCALE = 1000 + DEPTH_SCALE: int = 1000 def buffer_size(self) -> int: """Returns size of the internal buffer.""" diff --git a/python/rcs/camera/sim.py b/python/rcs/camera/sim.py index fd79bd10..1ec1ee94 100644 --- a/python/rcs/camera/sim.py +++ b/python/rcs/camera/sim.py @@ -90,7 +90,7 @@ def _cpp_to_python_frames(self, cpp_frameset: _FrameSet | None) -> FrameSet | No frames[color_name] = frame return FrameSet(frames=frames, avg_timestamp=cpp_frameset.timestamp) - def _intrinsics(self, camera_name) -> np.ndarray[tuple[Literal[3, 4]], np.dtype[np.float64]]: + def _intrinsics(self, camera_name) -> np.ndarray[tuple[Literal[3], Literal[4]], np.dtype[np.float64]]: cam_id = mujoco.mj_name2id(self._sim.model, mujoco.mjtObj.mjOBJ_CAMERA, self.cameras[camera_name].identifier) fovy = self._sim.model.cam_fovy[cam_id] fx = fy = 0.5 * self.cameras[camera_name].resolution_height / np.tan(fovy * np.pi / 360) @@ -102,14 +102,14 @@ def _intrinsics(self, camera_name) -> np.ndarray[tuple[Literal[3, 4]], np.dtype[ ] ) - def _extrinsics(self, camera_name) -> np.ndarray[tuple[Literal[4, 4]], np.dtype[np.float64]]: + def _extrinsics(self, camera_name) -> np.ndarray[tuple[Literal[4], Literal[4]], np.dtype[np.float64]]: cam_id = mujoco.mj_name2id(self._sim.model, mujoco.mjtObj.mjOBJ_CAMERA, self.cameras[camera_name].identifier) xpos = self._sim.data.cam_xpos[cam_id] xmat = self._sim.data.cam_xmat[cam_id].reshape(3, 3) cam = common.Pose(rotation=xmat, translation=xpos) # put z axis infront - rotation_p = common.Pose(rpy_vector=[np.pi, 0, 0], translation=[0, 0, 0]) + rotation_p = common.Pose(rpy_vector=np.array([np.pi, 0, 0]), translation=np.array([0, 0, 0])) cam = cam * rotation_p return cam.inverse().pose_matrix() diff --git a/python/rcs/envs/base.py b/python/rcs/envs/base.py index 7d10c0ad..4eee1be9 100644 --- a/python/rcs/envs/base.py +++ b/python/rcs/envs/base.py @@ -3,7 +3,7 @@ import copy import logging from enum import Enum, auto -from typing import Annotated, Any, TypeAlias, cast +from typing import Annotated, Any, Literal, TypeAlias, cast import gymnasium as gym import numpy as np @@ -131,7 +131,7 @@ class CameraDataDictType(RCSpaceType): "frame", ] intrinsics: Annotated[ - np.ndarray, + np.ndarray[tuple[Literal[3], Literal[4]], np.dtype[np.float64]] | None, gym.spaces.Box( low=-np.inf, high=np.inf, @@ -140,7 +140,7 @@ class CameraDataDictType(RCSpaceType): ), ] extrinsics: Annotated[ - np.ndarray, + np.ndarray[tuple[Literal[4], Literal[4]], np.dtype[np.float64]] | None, gym.spaces.Box( low=-np.inf, high=np.inf, @@ -646,7 +646,7 @@ def check_depth(depth): raise ValueError(msg) return self.include_depth - frame_dict: dict[str, dict[str, np.ndarray]] = { + frame_dict: dict[str, dict[str, CameraDataDictType]] = { camera_name: ( { self.RGB_KEY: CameraDataDictType( diff --git a/python/rcs/envs/creators.py b/python/rcs/envs/creators.py index b09eae75..be519b43 100644 --- a/python/rcs/envs/creators.py +++ b/python/rcs/envs/creators.py @@ -1,4 +1,5 @@ import logging +import typing from os import PathLike from typing import Type @@ -6,6 +7,7 @@ import numpy as np from gymnasium.envs.registration import EnvCreator from rcs._core.sim import CameraType +from rcs.camera.interface import BaseCameraSet from rcs.camera.sim import SimCameraConfig, SimCameraSet from rcs.envs.base import ( CameraSetWrapper, @@ -87,7 +89,9 @@ def __call__( # type: ignore env = RobotSimWrapper(env, simulation, sim_wrapper) if cameras is not None: - camera_set = SimCameraSet(simulation, cameras, physical_units=True, render_on_demand=True) + camera_set = typing.cast( + BaseCameraSet, SimCameraSet(simulation, cameras, physical_units=True, render_on_demand=True) + ) env = CameraSetWrapper(env, camera_set, include_depth=True) if gripper_cfg is not None and isinstance(gripper_cfg, rcs.sim.SimGripperConfig): diff --git a/src/sim/sim.cpp b/src/sim/sim.cpp index bb1d3163..b33caefd 100644 --- a/src/sim/sim.cpp +++ b/src/sim/sim.cpp @@ -26,7 +26,7 @@ bool get_last_return_value(ConditionCallback cb) { return cb.last_return_value; } -Sim::Sim(mjModel* m, mjData* d) : m(m), d(d), renderer(m) {}; +Sim::Sim(mjModel* m, mjData* d) : m(m), d(d), renderer(m){}; bool Sim::set_config(const Config& cfg) { if (cfg.async) {