diff --git a/dimos/robot/unitree_webrtc/keyboard_pose_teleop.py b/dimos/robot/unitree_webrtc/keyboard_pose_teleop.py new file mode 100644 index 0000000000..dcae2078fe --- /dev/null +++ b/dimos/robot/unitree_webrtc/keyboard_pose_teleop.py @@ -0,0 +1,240 @@ +#!/usr/bin/env python3 +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import annotations + +import math +import os +import threading + +import pygame + +from dimos.core import In, Module, Out, rpc +from dimos.msgs.geometry_msgs import PoseStamped, Quaternion, Vector3 +from dimos.msgs.std_msgs.Bool import Bool + +# Force X11 driver to avoid OpenGL threading issues (same as keyboard_teleop) +os.environ.setdefault("SDL_VIDEODRIVER", "x11") + + +STEP_FORWARD = 0.5 # meters per key press +STEP_LEFT = 0.5 # meters per key press +STEP_DEGREES = 15.0 # yaw change per key press + + +class KeyboardPoseTeleop(Module): + """Pygame-based keyboard control module for pose goals. + + This module maps discrete key presses to relative pose offsets and publishes + PoseStamped goals on the existing navigation goal topics. + + It is intended to complement the velocity-based KeyboardTeleop, by providing + a way to send small relative navigation goals using the same navigation stack. + """ + + # Navigation goal interface (wired via existing transports, e.g. /goal_pose, /cancel_goal) + goal_pose: Out[PoseStamped] + cancel_goal: Out[Bool] + + # Current robot pose in a global frame, e.g. /odom + odom: In[PoseStamped] + + _stop_event: threading.Event + _thread: threading.Thread | None = None + _screen: pygame.Surface | None = None + _clock: pygame.time.Clock | None = None + _font: pygame.font.Font | None = None + _current_pose: PoseStamped | None = None + + def __init__(self) -> None: + super().__init__() + self._stop_event = threading.Event() + + @rpc + def start(self) -> bool: + super().start() + + self._stop_event.clear() + + # Subscribe to odometry pose so we can generate relative goals + if self.odom: + self.odom.subscribe(self._on_odom) + + self._thread = threading.Thread(target=self._pygame_loop, daemon=True) + self._thread.start() + + return True + + @rpc + def stop(self) -> None: + # Optionally cancel any active goal + if self.cancel_goal: + cancel_msg = Bool(data=True) + self.cancel_goal.publish(cancel_msg) + + self._stop_event.set() + + if self._thread is None: + raise RuntimeError("Cannot stop: thread was never started") + self._thread.join(2) + + super().stop() + + def _on_odom(self, pose: PoseStamped) -> None: + """Callback to update the current pose estimate.""" + self._current_pose = pose + + def _pygame_loop(self) -> None: + pygame.init() + self._screen = pygame.display.set_mode((520, 260), pygame.SWSURFACE) + pygame.display.set_caption("Keyboard Pose Teleop") + self._clock = pygame.time.Clock() + self._font = pygame.font.Font(None, 24) + + while not self._stop_event.is_set(): + for event in pygame.event.get(): + if event.type == pygame.QUIT: + self._stop_event.set() + elif event.type == pygame.KEYDOWN: + if event.key == pygame.K_ESCAPE: + # ESC quits + self._stop_event.set() + elif event.key == pygame.K_SPACE: + # Cancel current navigation goal + if self.cancel_goal: + cancel_msg = Bool(data=True) + self.cancel_goal.publish(cancel_msg) + print("CANCEL GOAL") + else: + self._handle_motion_key(event.key) + + self._update_display() + + if self._clock is None: + raise RuntimeError("_clock not initialized") + self._clock.tick(20) + + pygame.quit() + + def _handle_motion_key(self, key: int) -> None: + """Map a single key press to a relative move and publish a goal.""" + if self._current_pose is None: + # No pose estimate yet; cannot generate a sensible goal + print("No odom pose received yet; ignoring key press.") + return + + forward, left, degrees = 0.0, 0.0, 0.0 + + # Arrow keys for relative translation in the robot's local frame + if key == pygame.K_UP: + forward = STEP_FORWARD + elif key == pygame.K_DOWN: + forward = -STEP_FORWARD + elif key == pygame.K_LEFT: + left = STEP_LEFT + elif key == pygame.K_RIGHT: + left = -STEP_LEFT + + # Q/E for yaw rotation in place + elif key == pygame.K_q: + degrees = STEP_DEGREES + elif key == pygame.K_e: + degrees = -STEP_DEGREES + + else: + # Unmapped key + return + + goal = self._generate_new_goal(self._current_pose, forward, left, degrees) + + if self.goal_pose: + self.goal_pose.publish(goal) + print( + f"Published goal: forward={forward:.2f} m, left={left:.2f} m, " + f"yaw_delta={degrees:.1f} deg" + ) + + @staticmethod + def _generate_new_goal( + current_pose: PoseStamped, + forward: float, + left: float, + degrees: float, + ) -> PoseStamped: + """Generate a new PoseStamped goal using relative offsets. + + Logic mirrors the UnitreeSkillContainer.relative_move helper so that we + reuse the same navigation interface and semantics: + - (forward, left) are offsets in the robot's local frame. + - degrees is the desired change in yaw at the goal. + """ + local_offset = Vector3(forward, left, 0.0) + # Rotate the local offset into the global frame using current orientation + global_offset = current_pose.orientation.rotate_vector(local_offset) + goal_position = current_pose.position + global_offset + + current_euler = current_pose.orientation.to_euler() + goal_yaw = current_euler.yaw + math.radians(degrees) + goal_euler = Vector3(current_euler.roll, current_euler.pitch, goal_yaw) + goal_orientation = Quaternion.from_euler(goal_euler) + + return PoseStamped( + position=goal_position, + orientation=goal_orientation, + frame_id=current_pose.frame_id, + ) + + def _update_display(self) -> None: + if self._screen is None or self._font is None: + raise RuntimeError("Not initialized correctly") + + self._screen.fill((30, 30, 30)) + + y_pos = 20 + + lines = [ + "Keyboard Pose Teleop", + "", + "Arrow keys: relative move (F/B/L/R)", + "Q/E: rotate left/right", + "Space: cancel goal", + "ESC: quit", + "", + ] + + if self._current_pose is not None: + pos = self._current_pose.position + yaw = self._current_pose.orientation.to_euler().yaw + lines.extend( + [ + f"Pose x={pos.x:.2f}, y={pos.y:.2f}, z={pos.z:.2f}", + f"Yaw={math.degrees(yaw):.1f} deg", + ] + ) + else: + lines.append("Waiting for odom pose...") + + for text in lines: + color = (0, 255, 255) if text.startswith("Keyboard Pose Teleop") else (255, 255, 255) + surf = self._font.render(text, True, color) + self._screen.blit(surf, (20, y_pos)) + y_pos += 28 + + pygame.display.flip() + + +keyboard_pose_teleop = KeyboardPoseTeleop.blueprint + +__all__ = ["KeyboardPoseTeleop", "keyboard_pose_teleop"] diff --git a/dimos/robot/unitree_webrtc/unitree_g1_blueprints.py b/dimos/robot/unitree_webrtc/unitree_g1_blueprints.py new file mode 100644 index 0000000000..b8670f4b6a --- /dev/null +++ b/dimos/robot/unitree_webrtc/unitree_g1_blueprints.py @@ -0,0 +1,270 @@ +#!/usr/bin/env python3 +# Copyright 2025-2026 Dimensional Inc. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Blueprint configurations for Unitree G1 humanoid robot. + +This module provides pre-configured blueprints for various G1 robot setups, +from basic teleoperation to full autonomous agent configurations. +""" + +from dimos_lcm.foxglove_msgs import SceneUpdate +from dimos_lcm.foxglove_msgs.ImageAnnotations import ( + ImageAnnotations, +) +from dimos_lcm.sensor_msgs import CameraInfo + +from dimos.agents.agent import llm_agent +from dimos.agents.cli.human import human_input +from dimos.agents.skills.navigation import navigation_skill +from dimos.constants import DEFAULT_CAPACITY_COLOR_IMAGE +from dimos.core.blueprints import autoconnect +from dimos.core.transport import LCMTransport, pSHMTransport +from dimos.hardware.sensors.camera import zed +from dimos.hardware.sensors.camera.module import camera_module # type: ignore[attr-defined] +from dimos.hardware.sensors.camera.webcam import Webcam +from dimos.mapping.costmapper import cost_mapper +from dimos.mapping.voxels import voxel_mapper +from dimos.msgs.geometry_msgs import ( + PoseStamped, + Quaternion, + Transform, + Twist, + Vector3, +) +from dimos.msgs.nav_msgs import Odometry, Path +from dimos.msgs.sensor_msgs import Image, PointCloud2 +from dimos.msgs.std_msgs import Bool +from dimos.msgs.vision_msgs import Detection2DArray +from dimos.navigation.frontier_exploration import wavefront_frontier_explorer +from dimos.navigation.replanning_a_star.module import replanning_a_star_planner +from dimos.navigation.rosnav import ros_nav +from dimos.perception.detection.detectors.person.yolo import YoloPersonDetector +from dimos.perception.detection.module3D import Detection3DModule, detection3d_module +from dimos.perception.detection.moduleDB import ObjectDBModule, detectionDB_module +from dimos.perception.detection.person_tracker import PersonTracker, person_tracker_module +from dimos.perception.object_tracker import object_tracking +from dimos.perception.spatial_perception import spatial_memory +from dimos.robot.foxglove_bridge import foxglove_bridge +from dimos.robot.unitree.connection.g1 import g1_connection +from dimos.robot.unitree.connection.g1sim import g1_sim_connection +from dimos.robot.unitree.g1.skill_container import g1_skills +from dimos.robot.unitree.keyboard_teleop import keyboard_teleop +from dimos.robot.unitree_webrtc.keyboard_teleop import keyboard_teleop +from dimos.utils.monitoring import utilization +from dimos.web.websocket_vis.websocket_vis_module import websocket_vis + +_basic_no_nav = ( + autoconnect( + camera_module( + transform=Transform( + translation=Vector3(0.05, 0.0, 0.0), + rotation=Quaternion.from_euler(Vector3(0.0, 0.2, 0.0)), + frame_id="sensor", + child_frame_id="camera_link", + ), + hardware=lambda: Webcam( + camera_index=0, + fps=15, + stereo_slice="left", + camera_info=zed.CameraInfo.SingleWebcam, + ), + ), + voxel_mapper(voxel_size=0.1), + cost_mapper(), + wavefront_frontier_explorer(), + # Visualization + websocket_vis(), + foxglove_bridge(), + ) + .global_config(n_dask_workers=4, robot_model="unitree_g1") + .transports( + { + # G1 uses Twist for movement commands + ("cmd_vel", Twist): LCMTransport("/cmd_vel", Twist), + # State estimation from ROS + ("state_estimation", Odometry): LCMTransport("/state_estimation", Odometry), + # Odometry output from ROSNavigationModule + ("odom", PoseStamped): LCMTransport("/odom", PoseStamped), + # Navigation module topics from nav_bot + ("goal_req", PoseStamped): LCMTransport("/goal_req", PoseStamped), + ("goal_active", PoseStamped): LCMTransport("/goal_active", PoseStamped), + ("path_active", Path): LCMTransport("/path_active", Path), + ("pointcloud", PointCloud2): LCMTransport("/lidar", PointCloud2), + ("global_pointcloud", PointCloud2): LCMTransport("/map", PointCloud2), + # Original navigation topics for backwards compatibility + ("goal_pose", PoseStamped): LCMTransport("/goal_pose", PoseStamped), + ("goal_reached", Bool): LCMTransport("/goal_reached", Bool), + ("cancel_goal", Bool): LCMTransport("/cancel_goal", Bool), + # Camera topics (if camera module is added) + ("color_image", Image): LCMTransport("/g1/color_image", Image), + ("camera_info", CameraInfo): LCMTransport("/g1/camera_info", CameraInfo), + } + ) +) + +basic_ros = autoconnect( + _basic_no_nav, + g1_connection(), + ros_nav(), +) + +basic_sim = autoconnect( + _basic_no_nav, + g1_sim_connection(), + replanning_a_star_planner(), +) + +_perception_and_memory = autoconnect( + spatial_memory(), + object_tracking(frame_id="camera_link"), + utilization(), +) + +standard = autoconnect( + basic_ros, + _perception_and_memory, +).global_config(n_dask_workers=8) + +standard_sim = autoconnect( + basic_sim, + _perception_and_memory, +).global_config(n_dask_workers=8) + +# Optimized configuration using shared memory for images +standard_with_shm = autoconnect( + standard.transports( + { + ("color_image", Image): pSHMTransport( + "/g1/color_image", default_capacity=DEFAULT_CAPACITY_COLOR_IMAGE + ), + } + ), + foxglove_bridge( + shm_channels=[ + "/g1/color_image#sensor_msgs.Image", + ] + ), +) + +_agentic_skills = autoconnect( + llm_agent(), + human_input(), + navigation_skill(), + g1_skills(), +) + +# Full agentic configuration with LLM and skills +agentic = autoconnect( + standard, + _agentic_skills, +) + +agentic_sim = autoconnect( + standard_sim, + _agentic_skills, +) + +# Configuration with joystick control for teleoperation +with_joystick = autoconnect( + basic_ros, + keyboard_teleop(), # Pygame-based velocity control (cmd_vel) + keyboard_pose_teleop(), # Pygame-based pose goal control (goal_pose/cancel_goal) +) + +# Detection configuration with person tracking and 3D detection +detection = ( + autoconnect( + basic_ros, + # Person detection modules with YOLO + detection3d_module( + camera_info=zed.CameraInfo.SingleWebcam, + detector=YoloPersonDetector, + ), + detectionDB_module( + camera_info=zed.CameraInfo.SingleWebcam, + filter=lambda det: det.class_id == 0, # Filter for person class only + ), + person_tracker_module( + cameraInfo=zed.CameraInfo.SingleWebcam, + ), + ) + .global_config(n_dask_workers=8) + .remappings( + [ + # Connect detection modules to camera and lidar + (Detection3DModule, "image", "color_image"), + (Detection3DModule, "pointcloud", "pointcloud"), + (ObjectDBModule, "image", "color_image"), + (ObjectDBModule, "pointcloud", "pointcloud"), + (PersonTracker, "image", "color_image"), + (PersonTracker, "detections", "detections_2d"), + ] + ) + .transports( + { + # Detection 3D module outputs + ("detections", Detection3DModule): LCMTransport( + "/detector3d/detections", Detection2DArray + ), + ("annotations", Detection3DModule): LCMTransport( + "/detector3d/annotations", ImageAnnotations + ), + ("scene_update", Detection3DModule): LCMTransport( + "/detector3d/scene_update", SceneUpdate + ), + ("detected_pointcloud_0", Detection3DModule): LCMTransport( + "/detector3d/pointcloud/0", PointCloud2 + ), + ("detected_pointcloud_1", Detection3DModule): LCMTransport( + "/detector3d/pointcloud/1", PointCloud2 + ), + ("detected_pointcloud_2", Detection3DModule): LCMTransport( + "/detector3d/pointcloud/2", PointCloud2 + ), + ("detected_image_0", Detection3DModule): LCMTransport("/detector3d/image/0", Image), + ("detected_image_1", Detection3DModule): LCMTransport("/detector3d/image/1", Image), + ("detected_image_2", Detection3DModule): LCMTransport("/detector3d/image/2", Image), + # Detection DB module outputs + ("detections", ObjectDBModule): LCMTransport( + "/detectorDB/detections", Detection2DArray + ), + ("annotations", ObjectDBModule): LCMTransport( + "/detectorDB/annotations", ImageAnnotations + ), + ("scene_update", ObjectDBModule): LCMTransport("/detectorDB/scene_update", SceneUpdate), + ("detected_pointcloud_0", ObjectDBModule): LCMTransport( + "/detectorDB/pointcloud/0", PointCloud2 + ), + ("detected_pointcloud_1", ObjectDBModule): LCMTransport( + "/detectorDB/pointcloud/1", PointCloud2 + ), + ("detected_pointcloud_2", ObjectDBModule): LCMTransport( + "/detectorDB/pointcloud/2", PointCloud2 + ), + ("detected_image_0", ObjectDBModule): LCMTransport("/detectorDB/image/0", Image), + ("detected_image_1", ObjectDBModule): LCMTransport("/detectorDB/image/1", Image), + ("detected_image_2", ObjectDBModule): LCMTransport("/detectorDB/image/2", Image), + # Person tracker outputs + ("target", PersonTracker): LCMTransport("/person_tracker/target", PoseStamped), + } + ) +) + +# Full featured configuration with everything +full_featured = autoconnect( + standard_with_shm, + _agentic_skills, + keyboard_teleop(), +)