diff --git a/.gitignore b/.gitignore
index 24a3dd8919..f97d9f906a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -69,3 +69,5 @@ yolo11n.pt
CLAUDE.MD
/assets/teleop_certs/
+
+/.mcp.json
diff --git a/README.md b/README.md
index a84fe11b0e..35ae8546c1 100644
--- a/README.md
+++ b/README.md
@@ -107,7 +107,7 @@ Dimensional is agent native -- "vibecode" your robots in natural language and bu
π₯ Unitree B1
- π¨ Unitree G1
+ π¨ Unitree G1
|
π₯ Xarm
diff --git a/dimos/agents/agent.py b/dimos/agents/agent.py
index 76195ccea0..98f23d7e8d 100644
--- a/dimos/agents/agent.py
+++ b/dimos/agents/agent.py
@@ -46,9 +46,8 @@ class AgentConfig(ModuleConfig):
model_fixture: str | None = None
-class Agent(Module):
- default_config: type[AgentConfig] = AgentConfig
- config: AgentConfig
+class Agent(Module[AgentConfig]):
+ default_config = AgentConfig
agent: Out[BaseMessage]
human_input: In[str]
agent_idle: Out[bool]
diff --git a/dimos/agents/conftest.py b/dimos/agents/conftest.py
index 23d888b0fe..1be2aadc0c 100644
--- a/dimos/agents/conftest.py
+++ b/dimos/agents/conftest.py
@@ -31,11 +31,6 @@
FIXTURE_DIR = Path(__file__).parent / "fixtures"
-@pytest.fixture
-def fixture_dir() -> Path:
- return FIXTURE_DIR
-
-
@pytest.fixture
def agent_setup(request):
coordinator = None
diff --git a/dimos/agents/mcp/README.md b/dimos/agents/mcp/README.md
new file mode 100644
index 0000000000..f9e887beb1
--- /dev/null
+++ b/dimos/agents/mcp/README.md
@@ -0,0 +1,55 @@
+# DimOS MCP Server
+
+Expose DimOS robot skills to Claude Code via Model Context Protocol.
+
+## Setup
+
+```bash
+uv sync --extra base --extra unitree
+```
+
+Add to Claude Code (one command)
+
+```bash
+claude mcp add --transport http --scope project dimos http://localhost:9990/mcp
+```
+
+Verify that it was added:
+
+```bash
+claude mcp list
+```
+
+## MCP Inspector
+
+If you want to inspect the server manually, you can use MCP Inspector.
+
+Install it:
+
+```bash
+npx -y @modelcontextprotocol/inspector
+```
+
+It will open a browser window.
+
+Change **Transport Type** to "Streamable HTTP", change **URL** to `http://localhost:9990/mcp`, and **Connection Type** to "Direct". Then click on "Connect".
+
+## Usage
+
+**Terminal 1** - Start DimOS:
+```bash
+uv run dimos run unitree-go2-agentic-mcp
+```
+
+**Claude Code** - Use robot skills:
+```
+> move forward 1 meter
+> go to the kitchen
+> tag this location as "desk"
+```
+
+## How It Works
+
+1. `McpServer` in the blueprint starts a FastAPI server on port 9990
+2. Claude Code connects directly to `http://localhost:9990/mcp`
+3. Skills are exposed as MCP tools (e.g., `relative_move`, `navigate_with_text`)
diff --git a/dimos/environment/__init__.py b/dimos/agents/mcp/__init__.py
similarity index 100%
rename from dimos/environment/__init__.py
rename to dimos/agents/mcp/__init__.py
diff --git a/dimos/agents/mcp/conftest.py b/dimos/agents/mcp/conftest.py
new file mode 100644
index 0000000000..532ef16592
--- /dev/null
+++ b/dimos/agents/mcp/conftest.py
@@ -0,0 +1,103 @@
+# Copyright 2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from pathlib import Path
+from threading import Event
+
+from dotenv import load_dotenv
+from langchain_core.messages.base import BaseMessage
+import pytest
+
+from dimos.agents.agent_test_runner import AgentTestRunner
+from dimos.agents.mcp.mcp_client import McpClient
+from dimos.agents.mcp.mcp_server import McpServer
+from dimos.core.blueprints import autoconnect
+from dimos.core.global_config import global_config
+from dimos.core.transport import pLCMTransport
+
+load_dotenv()
+
+FIXTURE_DIR = Path(__file__).parent / "fixtures"
+
+
+@pytest.fixture
+def agent_setup(request):
+ coordinator = None
+ transports: list[pLCMTransport] = []
+ unsubs: list = []
+ recording = bool(os.getenv("RECORD"))
+
+ def fn(
+ *,
+ blueprints,
+ messages: list[BaseMessage],
+ dask: bool = False,
+ system_prompt: str | None = None,
+ fixture: str | None = None,
+ ) -> list[BaseMessage]:
+ history: list[BaseMessage] = []
+ finished_event = Event()
+
+ agent_transport: pLCMTransport = pLCMTransport("/agent")
+ finished_transport: pLCMTransport = pLCMTransport("/finished")
+ transports.extend([agent_transport, finished_transport])
+
+ def on_message(msg: BaseMessage) -> None:
+ history.append(msg)
+
+ unsubs.append(agent_transport.subscribe(on_message))
+ unsubs.append(finished_transport.subscribe(lambda _: finished_event.set()))
+
+ # Derive fixture path from test name if not explicitly provided.
+ if fixture is not None:
+ fixture_path = FIXTURE_DIR / fixture
+ else:
+ fixture_path = FIXTURE_DIR / f"{request.node.name}.json"
+
+ client_kwargs: dict = {"system_prompt": system_prompt}
+
+ if recording or fixture_path.exists():
+ client_kwargs["model_fixture"] = str(fixture_path)
+
+ blueprint = autoconnect(
+ *blueprints,
+ McpServer.blueprint(),
+ McpClient.blueprint(**client_kwargs),
+ AgentTestRunner.blueprint(messages=messages),
+ )
+
+ global_config.update(
+ viewer_backend="none",
+ dask=dask,
+ )
+
+ nonlocal coordinator
+ coordinator = blueprint.build()
+
+ if not finished_event.wait(60):
+ raise TimeoutError("Timed out waiting for agent to finish processing messages.")
+
+ return history
+
+ yield fn
+
+ if coordinator is not None:
+ coordinator.stop()
+
+ for transport in transports:
+ transport.stop()
+
+ for unsub in unsubs:
+ unsub()
diff --git a/dimos/agents/mcp/fixtures/test_can_call_again_on_error[False].json b/dimos/agents/mcp/fixtures/test_can_call_again_on_error[False].json
new file mode 100644
index 0000000000..8cfe2f69c7
--- /dev/null
+++ b/dimos/agents/mcp/fixtures/test_can_call_again_on_error[False].json
@@ -0,0 +1,34 @@
+{
+ "responses": [
+ {
+ "content": "",
+ "tool_calls": [
+ {
+ "name": "register_user",
+ "args": {
+ "name": "Paul"
+ },
+ "id": "call_NrrizXSIFaeCLuG9i05IwDy3",
+ "type": "tool_call"
+ }
+ ]
+ },
+ {
+ "content": "",
+ "tool_calls": [
+ {
+ "name": "register_user",
+ "args": {
+ "name": "paul"
+ },
+ "id": "call_2QPx4GsL61Xjrggbq7afXTjn",
+ "type": "tool_call"
+ }
+ ]
+ },
+ {
+ "content": "The user named 'paul' has been registered successfully.",
+ "tool_calls": []
+ }
+ ]
+}
diff --git a/dimos/agents/mcp/fixtures/test_can_call_again_on_error[True].json b/dimos/agents/mcp/fixtures/test_can_call_again_on_error[True].json
new file mode 100644
index 0000000000..3d3765f43a
--- /dev/null
+++ b/dimos/agents/mcp/fixtures/test_can_call_again_on_error[True].json
@@ -0,0 +1,34 @@
+{
+ "responses": [
+ {
+ "content": "",
+ "tool_calls": [
+ {
+ "name": "register_user",
+ "args": {
+ "name": "Paul"
+ },
+ "id": "call_XSy1Dx1dGtQv5zPaEJtb2hd7",
+ "type": "tool_call"
+ }
+ ]
+ },
+ {
+ "content": "",
+ "tool_calls": [
+ {
+ "name": "register_user",
+ "args": {
+ "name": "paul"
+ },
+ "id": "call_aYFug1g3TATnaYus9HUVxoQS",
+ "type": "tool_call"
+ }
+ ]
+ },
+ {
+ "content": "The user named \"paul\" has been registered successfully.",
+ "tool_calls": []
+ }
+ ]
+}
diff --git a/dimos/agents/mcp/fixtures/test_can_call_tool[False].json b/dimos/agents/mcp/fixtures/test_can_call_tool[False].json
new file mode 100644
index 0000000000..7d1ac3075b
--- /dev/null
+++ b/dimos/agents/mcp/fixtures/test_can_call_tool[False].json
@@ -0,0 +1,22 @@
+{
+ "responses": [
+ {
+ "content": "",
+ "tool_calls": [
+ {
+ "name": "add",
+ "args": {
+ "x": 33333,
+ "y": 100
+ },
+ "id": "call_RssRDDd9apDjNoVLz4jRLVk0",
+ "type": "tool_call"
+ }
+ ]
+ },
+ {
+ "content": "The result of 33333 + 100 is 33433.",
+ "tool_calls": []
+ }
+ ]
+}
diff --git a/dimos/agents/mcp/fixtures/test_can_call_tool[True].json b/dimos/agents/mcp/fixtures/test_can_call_tool[True].json
new file mode 100644
index 0000000000..d375c82235
--- /dev/null
+++ b/dimos/agents/mcp/fixtures/test_can_call_tool[True].json
@@ -0,0 +1,22 @@
+{
+ "responses": [
+ {
+ "content": "",
+ "tool_calls": [
+ {
+ "name": "add",
+ "args": {
+ "x": 33333,
+ "y": 100
+ },
+ "id": "call_pzzddF9mBynGYZVdCmGHOB5V",
+ "type": "tool_call"
+ }
+ ]
+ },
+ {
+ "content": "The result of 33333 + 100 is 33433.",
+ "tool_calls": []
+ }
+ ]
+}
diff --git a/dimos/agents/mcp/fixtures/test_image.json b/dimos/agents/mcp/fixtures/test_image.json
new file mode 100644
index 0000000000..0e4816b8ee
--- /dev/null
+++ b/dimos/agents/mcp/fixtures/test_image.json
@@ -0,0 +1,23 @@
+{
+ "responses": [
+ {
+ "content": "",
+ "tool_calls": [
+ {
+ "name": "take_a_picture",
+ "args": {},
+ "id": "call_7Qwsr8QMLWhKRMektcGiKYf7",
+ "type": "tool_call"
+ }
+ ]
+ },
+ {
+ "content": "I've taken a picture. Let me analyze and describe it for you.\nThe image features an expansive outdoor stadium. From the camera's perspective, the word 'stadium' best matches the image. Is there anything else you'd like to know or do?",
+ "tool_calls": []
+ },
+ {
+ "content": "The image shows a group of people sitting and enjoying their time at an outdoor cafe. Therefore, the word 'cafe' best matches the image.",
+ "tool_calls": []
+ }
+ ]
+}
diff --git a/dimos/agents/mcp/fixtures/test_multiple_tool_calls_with_multiple_messages.json b/dimos/agents/mcp/fixtures/test_multiple_tool_calls_with_multiple_messages.json
new file mode 100644
index 0000000000..5c0d551e13
--- /dev/null
+++ b/dimos/agents/mcp/fixtures/test_multiple_tool_calls_with_multiple_messages.json
@@ -0,0 +1,116 @@
+{
+ "responses": [
+ {
+ "content": "",
+ "tool_calls": [
+ {
+ "name": "locate_person",
+ "args": {
+ "name": "John"
+ },
+ "id": "call_eOoKTtyvvXBk171ro4bXzW5C",
+ "type": "tool_call"
+ }
+ ]
+ },
+ {
+ "content": "",
+ "tool_calls": [
+ {
+ "name": "register_person",
+ "args": {
+ "name": "John"
+ },
+ "id": "call_tTB5A3q60teaBrdonRvCwcwM",
+ "type": "tool_call"
+ }
+ ]
+ },
+ {
+ "content": "",
+ "tool_calls": [
+ {
+ "name": "locate_person",
+ "args": {
+ "name": "John"
+ },
+ "id": "call_uEhafkL3f7BLQKhRuZlEAany",
+ "type": "tool_call"
+ }
+ ]
+ },
+ {
+ "content": "",
+ "tool_calls": [
+ {
+ "name": "go_to_location",
+ "args": {
+ "description": "kitchen"
+ },
+ "id": "call_oxnH4gCGi6aSeVLPrhnp31yP",
+ "type": "tool_call"
+ }
+ ]
+ },
+ {
+ "content": "I have moved to the kitchen where John is located.",
+ "tool_calls": []
+ },
+ {
+ "content": "",
+ "tool_calls": [
+ {
+ "name": "locate_person",
+ "args": {
+ "name": "Jane"
+ },
+ "id": "call_2HinxBmffnafloaP4b7DkBZW",
+ "type": "tool_call"
+ }
+ ]
+ },
+ {
+ "content": "",
+ "tool_calls": [
+ {
+ "name": "register_person",
+ "args": {
+ "name": "Jane"
+ },
+ "id": "call_XtHavMmgpzrhmVi3XB6RUFrW",
+ "type": "tool_call"
+ }
+ ]
+ },
+ {
+ "content": "",
+ "tool_calls": [
+ {
+ "name": "locate_person",
+ "args": {
+ "name": "Jane"
+ },
+ "id": "call_fRHHO4cPWDXi4IvQ4qQqidwT",
+ "type": "tool_call"
+ }
+ ]
+ },
+ {
+ "content": "",
+ "tool_calls": [
+ {
+ "name": "go_to_location",
+ "args": {
+ "description": "living room"
+ },
+ "id": "call_Hcc7C0FMWS8rfKwMP0sUL7XN",
+ "type": "tool_call"
+ }
+ ]
+ },
+ {
+ "content": "I have moved to the living room where Jane is located.",
+ "tool_calls": []
+ }
+ ]
+}
diff --git a/dimos/agents/mcp/fixtures/test_prompt.json b/dimos/agents/mcp/fixtures/test_prompt.json
new file mode 100644
index 0000000000..acb77fe350
--- /dev/null
+++ b/dimos/agents/mcp/fixtures/test_prompt.json
@@ -0,0 +1,8 @@
+{
+ "responses": [
+ {
+ "content": "Hello! My name is Johnny. How can I assist you today?",
+ "tool_calls": []
+ }
+ ]
+}
diff --git a/dimos/agents/mcp/mcp_client.py b/dimos/agents/mcp/mcp_client.py
new file mode 100644
index 0000000000..7c5eda5302
--- /dev/null
+++ b/dimos/agents/mcp/mcp_client.py
@@ -0,0 +1,250 @@
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from dataclasses import dataclass
+from queue import Empty, Queue
+from threading import Event, RLock, Thread
+import time
+from typing import Any
+import uuid
+
+import httpx
+from langchain.agents import create_agent
+from langchain_core.messages import HumanMessage
+from langchain_core.messages.base import BaseMessage
+from langchain_core.tools import StructuredTool
+from langgraph.graph.state import CompiledStateGraph
+from reactivex.disposable import Disposable
+
+from dimos.agents.system_prompt import SYSTEM_PROMPT
+from dimos.agents.utils import pretty_print_langchain_message
+from dimos.core.core import rpc
+from dimos.core.module import Module, ModuleConfig
+from dimos.core.rpc_client import RPCClient
+from dimos.core.stream import In, Out
+from dimos.utils.logging_config import setup_logger
+from dimos.utils.sequential_ids import SequentialIds
+
+logger = setup_logger()
+
+
+@dataclass
+class McpClientConfig(ModuleConfig):
+ system_prompt: str | None = SYSTEM_PROMPT
+ model: str = "gpt-4o"
+ model_fixture: str | None = None
+ mcp_server_url: str = "http://localhost:9990/mcp"
+
+
+class McpClient(Module[McpClientConfig]):
+ default_config = McpClientConfig
+ agent: Out[BaseMessage]
+ human_input: In[str]
+ agent_idle: Out[bool]
+
+ _lock: RLock
+ _state_graph: CompiledStateGraph[Any, Any, Any, Any] | None
+ _message_queue: Queue[BaseMessage]
+ _history: list[BaseMessage]
+ _thread: Thread
+ _stop_event: Event
+ _http_client: httpx.Client
+ _seq_ids: SequentialIds
+
+ def __init__(self, *args: Any, **kwargs: Any) -> None:
+ super().__init__(*args, **kwargs)
+ self._lock = RLock()
+ self._state_graph = None
+ self._message_queue = Queue()
+ self._history = []
+ self._thread = Thread(
+ target=self._thread_loop,
+ name=f"{self.__class__.__name__}-thread",
+ daemon=True,
+ )
+ self._stop_event = Event()
+ self._http_client = httpx.Client(timeout=120.0)
+ self._seq_ids = SequentialIds()
+
+ def __reduce__(self) -> Any:
+ return (self.__class__, (), {})
+
+ def _mcp_request(self, method: str, params: dict[str, Any] | None = None) -> dict[str, Any]:
+ body: dict[str, Any] = {
+ "jsonrpc": "2.0",
+ "id": self._seq_ids.next(),
+ "method": method,
+ }
+ if params is not None:
+ body["params"] = params
+
+ resp = self._http_client.post(self.config.mcp_server_url, json=body)
+ resp.raise_for_status()
+ data = resp.json()
+
+ if "error" in data:
+ raise RuntimeError(f"MCP error {data['error']['code']}: {data['error']['message']}")
+
+ result: dict[str, Any] = data.get("result")
+ return result
+
+ def _fetch_tools(self, timeout: float = 60.0, interval: float = 1.0) -> list[StructuredTool]:
+ result = self._try_fetch_tools(timeout=timeout, interval=interval)
+ if result is None:
+ raise RuntimeError(
+ f"Failed to fetch tools from MCP server {self.config.mcp_server_url}"
+ )
+
+ tools = [self._mcp_tool_to_langchain(t) for t in result.get("tools", [])]
+
+ if not tools:
+ logger.warning("No tools found from MCP server.")
+ else:
+ tool_names = [t.name for t in tools]
+ logger.info("Discovered tools from MCP server.", tools=tool_names, n_tools=len(tools))
+
+ return tools
+
+ def _try_fetch_tools(self, timeout: float, interval: float) -> dict[str, Any] | None:
+ deadline = time.monotonic() + timeout
+
+ while True:
+ try:
+ self._mcp_request("initialize")
+ break
+ except (httpx.ConnectError, httpx.RemoteProtocolError):
+ if time.monotonic() >= deadline:
+ return None
+ time.sleep(interval)
+
+ return self._mcp_request("tools/list")
+
+ def _mcp_tool_to_langchain(self, mcp_tool: dict[str, Any]) -> StructuredTool:
+ name = mcp_tool["name"]
+ description = mcp_tool.get("description", "")
+ input_schema = mcp_tool.get("inputSchema", {"type": "object", "properties": {}})
+
+ def call_tool(**kwargs: Any) -> str:
+ result = self._mcp_request("tools/call", {"name": name, "arguments": kwargs})
+ content = result.get("content", [])
+ parts = [c.get("text", "") for c in content if c.get("type") == "text"]
+ text = "\n".join(parts)
+
+ # Images need to be added to the history separately because they
+ # cannot be included in the tool response for OpenAI models and
+ # probably others.
+ for item in content:
+ if item.get("type") != "text":
+ uuid_ = str(uuid.uuid4())
+ text += f"Tool call started with UUID: {uuid_}. You will be updated with the result soon."
+ _append_image_to_history(self, name, uuid_, item)
+
+ return text
+
+ return StructuredTool(
+ name=name,
+ description=description,
+ func=call_tool,
+ args_schema=input_schema,
+ )
+
+ @rpc
+ def start(self) -> None:
+ super().start()
+
+ def _on_human_input(string: str) -> None:
+ self._message_queue.put(HumanMessage(content=string))
+
+ self._disposables.add(Disposable(self.human_input.subscribe(_on_human_input)))
+
+ @rpc
+ def on_system_modules(self, _modules: list[RPCClient]) -> None:
+ tools = self._fetch_tools()
+
+ model: str | Any = self.config.model
+ if self.config.model_fixture is not None:
+ from dimos.agents.testing import MockModel
+
+ model = MockModel(json_path=self.config.model_fixture)
+
+ with self._lock:
+ self._state_graph = create_agent(
+ model=model,
+ tools=tools,
+ system_prompt=self.config.system_prompt,
+ )
+ self._thread.start()
+
+ @rpc
+ def stop(self) -> None:
+ self._stop_event.set()
+ if self._thread.is_alive():
+ self._thread.join(timeout=2.0)
+ self._http_client.close()
+ super().stop()
+
+ @rpc
+ def add_message(self, message: BaseMessage) -> None:
+ self._message_queue.put(message)
+
+ def _thread_loop(self) -> None:
+ while not self._stop_event.is_set():
+ try:
+ message = self._message_queue.get(timeout=0.5)
+ except Empty:
+ continue
+
+ with self._lock:
+ if not self._state_graph:
+ raise ValueError("No state graph initialized")
+ self._process_message(self._state_graph, message)
+
+ def _process_message(
+ self, state_graph: CompiledStateGraph[Any, Any, Any, Any], message: BaseMessage
+ ) -> None:
+ self.agent_idle.publish(False)
+ self._history.append(message)
+ pretty_print_langchain_message(message)
+ self.agent.publish(message)
+
+ for update in state_graph.stream({"messages": self._history}, stream_mode="updates"):
+ for node_output in update.values():
+ for msg in node_output.get("messages", []):
+ self._history.append(msg)
+ pretty_print_langchain_message(msg)
+ self.agent.publish(msg)
+
+ if self._message_queue.empty():
+ self.agent_idle.publish(True)
+
+
+def _append_image_to_history(
+ mcp_client: McpClient, func_name: str, uuid_: str, result: Any
+) -> None:
+ mcp_client.add_message(
+ HumanMessage(
+ content=[
+ {
+ "type": "text",
+ "text": f"This is the artefact for the '{func_name}' tool with UUID:={uuid_}.",
+ },
+ result,
+ ]
+ )
+ )
+
+
+mcp_client = McpClient.blueprint
+
+__all__ = ["McpClient", "McpClientConfig", "mcp_client"]
diff --git a/dimos/agents/mcp/mcp_server.py b/dimos/agents/mcp/mcp_server.py
new file mode 100644
index 0000000000..1f8ce92888
--- /dev/null
+++ b/dimos/agents/mcp/mcp_server.py
@@ -0,0 +1,197 @@
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import asyncio
+import json
+from typing import TYPE_CHECKING, Any
+
+from fastapi import FastAPI
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import JSONResponse
+from starlette.responses import Response
+import uvicorn
+
+from dimos.utils.logging_config import setup_logger
+
+logger = setup_logger()
+
+
+from dimos.core import Module, rpc # noqa: I001
+from dimos.core.rpc_client import RpcCall, RPCClient
+
+from starlette.requests import Request # noqa: TC002
+
+if TYPE_CHECKING:
+ import concurrent.futures
+
+ from dimos.core.module import SkillInfo
+
+
+app = FastAPI()
+app.add_middleware(
+ CORSMiddleware,
+ allow_origins=["*"],
+ allow_methods=["POST"],
+ allow_headers=["*"],
+)
+app.state.skills = []
+app.state.rpc_calls = {}
+
+
+def _jsonrpc_result(req_id: Any, result: Any) -> dict[str, Any]:
+ return {"jsonrpc": "2.0", "id": req_id, "result": result}
+
+
+def _jsonrpc_result_text(req_id: Any, text: str) -> dict[str, Any]:
+ return _jsonrpc_result(req_id, {"content": [{"type": "text", "text": text}]})
+
+
+def _jsonrpc_error(req_id: Any, code: int, message: str) -> dict[str, Any]:
+ return {"jsonrpc": "2.0", "id": req_id, "error": {"code": code, "message": message}}
+
+
+def _handle_initialize(req_id: Any) -> dict[str, Any]:
+ return _jsonrpc_result(
+ req_id,
+ {
+ "protocolVersion": "2025-11-25",
+ "capabilities": {"tools": {}},
+ "serverInfo": {"name": "dimensional", "version": "1.0.0"},
+ },
+ )
+
+
+def _handle_tools_list(req_id: Any, skills: list[SkillInfo]) -> dict[str, Any]:
+ tools = []
+
+ for skill in skills:
+ schema = json.loads(skill.args_schema)
+ description = schema.pop("description", None)
+ schema.pop("title", None)
+ tool = {"name": skill.func_name, "inputSchema": schema}
+ if description:
+ tool["description"] = description
+ tools.append(tool)
+
+ return _jsonrpc_result(req_id, {"tools": tools})
+
+
+async def _handle_tools_call(
+ req_id: Any, params: dict[str, Any], rpc_calls: dict[str, Any]
+) -> dict[str, Any]:
+ name = params.get("name", "")
+ args: dict[str, Any] = params.get("arguments") or {}
+
+ rpc_call = rpc_calls.get(name)
+ if rpc_call is None:
+ return _jsonrpc_result_text(req_id, f"Tool not found: {name}")
+
+ try:
+ result = await asyncio.get_event_loop().run_in_executor(None, lambda: rpc_call(**args))
+ except Exception as e:
+ logger.exception("Error running tool", tool_name=name, exc_info=True)
+ return _jsonrpc_result_text(req_id, f"Error running tool '{name}': {e}")
+
+ if result is None:
+ return _jsonrpc_result_text(req_id, "It has started. You will be updated later.")
+
+ if hasattr(result, "agent_encode"):
+ return _jsonrpc_result(req_id, {"content": result.agent_encode()})
+
+ return _jsonrpc_result_text(req_id, str(result))
+
+
+async def handle_request(
+ request: dict[str, Any],
+ skills: list[SkillInfo],
+ rpc_calls: dict[str, Any],
+) -> dict[str, Any] | None:
+ """Handle a single MCP JSON-RPC request.
+
+ Returns None for JSON-RPC notifications (no ``id``), which must not
+ receive a response.
+ """
+ method = request.get("method", "")
+ params = request.get("params", {}) or {}
+ req_id = request.get("id")
+
+ # JSON-RPC notifications have no "id" β the server must not reply.
+ if "id" not in request:
+ return None
+
+ if method == "initialize":
+ return _handle_initialize(req_id)
+ if method == "tools/list":
+ return _handle_tools_list(req_id, skills)
+ if method == "tools/call":
+ return await _handle_tools_call(req_id, params, rpc_calls)
+ return _jsonrpc_error(req_id, -32601, f"Unknown: {method}")
+
+
+@app.post("/mcp")
+async def mcp_endpoint(request: Request) -> Response:
+ raw = await request.body()
+ try:
+ body = json.loads(raw)
+ except Exception:
+ logger.exception("POST /mcp JSON parse failed")
+ return JSONResponse(
+ {"jsonrpc": "2.0", "id": None, "error": {"code": -32700, "message": "Parse error"}},
+ status_code=400,
+ )
+ result = await handle_request(body, request.app.state.skills, request.app.state.rpc_calls)
+ if result is None:
+ return Response(status_code=204)
+ return JSONResponse(result)
+
+
+class McpServer(Module):
+ def __init__(self) -> None:
+ super().__init__()
+ self._uvicorn_server: uvicorn.Server | None = None
+ self._serve_future: concurrent.futures.Future[None] | None = None
+
+ @rpc
+ def start(self) -> None:
+ super().start()
+ self._start_server()
+
+ @rpc
+ def stop(self) -> None:
+ if self._uvicorn_server:
+ self._uvicorn_server.should_exit = True
+ loop = self._loop
+ if loop is not None and self._serve_future is not None:
+ self._serve_future.result(timeout=5.0)
+ self._uvicorn_server = None
+ self._serve_future = None
+ super().stop()
+
+ @rpc
+ def on_system_modules(self, modules: list[RPCClient]) -> None:
+ assert self.rpc is not None
+ app.state.skills = [skill for module in modules for skill in (module.get_skills() or [])]
+ app.state.rpc_calls = {
+ skill.func_name: RpcCall(None, self.rpc, skill.func_name, skill.class_name, [])
+ for skill in app.state.skills
+ }
+
+ def _start_server(self, port: int = 9990) -> None:
+ config = uvicorn.Config(app, host="0.0.0.0", port=port, log_level="info")
+ server = uvicorn.Server(config)
+ self._uvicorn_server = server
+ loop = self._loop
+ assert loop is not None
+ self._serve_future = asyncio.run_coroutine_threadsafe(server.serve(), loop)
diff --git a/dimos/agents/mcp/test_mcp_client.py b/dimos/agents/mcp/test_mcp_client.py
new file mode 100644
index 0000000000..be4a09d5b9
--- /dev/null
+++ b/dimos/agents/mcp/test_mcp_client.py
@@ -0,0 +1,210 @@
+# Copyright 2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from langchain_core.messages import HumanMessage
+import pytest
+
+from dimos.agents.annotation import skill
+from dimos.core.module import Module
+from dimos.msgs.sensor_msgs import Image
+from dimos.utils.data import get_data
+
+
+class Adder(Module):
+ @skill
+ def add(self, x: int, y: int) -> str:
+ """adds x and y."""
+ return str(x + y)
+
+
+@pytest.mark.integration
+@pytest.mark.parametrize("dask", [False, True])
+def test_can_call_tool(dask, agent_setup):
+ history = agent_setup(
+ blueprints=[Adder.blueprint()],
+ messages=[HumanMessage("What is 33333 + 100? Use the tool.")],
+ dask=dask,
+ )
+
+ assert "33433" in history[-1].content
+
+
+class UserRegistration(Module):
+ def __init__(self):
+ super().__init__()
+ self._first_call = True
+ self._use_upper = False
+
+ @skill
+ def register_user(self, name: str) -> str:
+ """registers a user by name."""
+
+ # If the agent calls with "paul" or "Paul", always say it's the wrong way
+ # to force it to try again.
+
+ if self._first_call:
+ self._first_call = False
+ self._use_upper = not name[0].isupper()
+
+ if self._use_upper and not name[0].isupper():
+ raise ValueError("Names must start with an uppercase letter.")
+ if not self._use_upper and name[0].isupper():
+ raise ValueError("The names must only use lowercase letters.")
+
+ return "User name registered successfully."
+
+
+@pytest.mark.integration
+@pytest.mark.parametrize("dask", [False, True])
+def test_can_call_again_on_error(dask, agent_setup):
+ history = agent_setup(
+ blueprints=[UserRegistration.blueprint()],
+ messages=[
+ HumanMessage(
+ "Register a user named 'Paul'. If there are errors, just try again until you succeed."
+ )
+ ],
+ dask=dask,
+ )
+
+ assert any(message.content == "User name registered successfully." for message in history)
+
+
+class MultipleTools(Module):
+ def __init__(self):
+ super().__init__()
+ self._people = {"Ben": "office", "Bob": "garage"}
+
+ @skill
+ def register_person(self, name: str) -> str:
+ """Registers a person by name."""
+ if name.lower() == "john":
+ self._people[name] = "kitchen"
+ elif name.lower() == "jane":
+ self._people[name] = "living room"
+ return f"'{name}' has been registered."
+
+ @skill
+ def locate_person(self, name: str) -> str:
+ """Locates a person by name."""
+ if name not in self._people:
+ known_people = list(self._people.keys())
+ return (
+ f"Error: '{name}' is not registered. People cannot be located until they've "
+ f"been registered in the system. People known so far: {', '.join(known_people)}. "
+ "Use register_person to register a person."
+ )
+ return f"'{name}' is located at '{self._people[name]}'."
+
+
+class NavigationSkill(Module):
+ @skill
+ def go_to_location(self, description: str) -> str:
+ """Go to a location by a description."""
+ if description.strip().lower() not in ["kitchen", "living room"]:
+ return f"Error: Unknown location description: '{description}'."
+ return f"Going to the {description}."
+
+
+@pytest.mark.integration
+def test_multiple_tool_calls_with_multiple_messages(agent_setup):
+ history = agent_setup(
+ blueprints=[MultipleTools.blueprint(), NavigationSkill.blueprint()],
+ messages=[
+ HumanMessage(
+ "You are a robot assistant. Move to the location where John is. Don't ask me for feedback, just go there."
+ ),
+ HumanMessage("Nice job. You did it. Now go to the location where Jane is."),
+ ],
+ )
+
+ # Collect all go_to_location calls from the history
+ go_to_location_calls = []
+ for message in history:
+ if hasattr(message, "tool_calls"):
+ for tool_call in message.tool_calls:
+ if tool_call["name"] == "go_to_location":
+ go_to_location_calls.append(tool_call)
+
+ # Find the index of the second HumanMessage to split first/second prompt
+ second_human_idx = None
+ human_count = 0
+ for i, message in enumerate(history):
+ if isinstance(message, HumanMessage):
+ human_count += 1
+ if human_count == 2:
+ second_human_idx = i
+ break
+
+ # Collect go_to_location calls before and after the second prompt
+ calls_after_first_prompt = []
+ calls_after_second_prompt = []
+ for i, message in enumerate(history):
+ if hasattr(message, "tool_calls"):
+ for tool_call in message.tool_calls:
+ if tool_call["name"] == "go_to_location":
+ if i < second_human_idx:
+ calls_after_first_prompt.append(tool_call)
+ else:
+ calls_after_second_prompt.append(tool_call)
+
+ # After the first prompt, go_to_location should be called with "kitchen"
+ assert len(calls_after_first_prompt) == 1
+ assert "kitchen" in calls_after_first_prompt[0]["args"]["description"].lower()
+
+ # After the second prompt, go_to_location should be called with "living room"
+ assert len(calls_after_second_prompt) == 1
+ assert "living room" in calls_after_second_prompt[0]["args"]["description"].lower()
+
+ # There should be exactly two go_to_location calls total
+ assert len(go_to_location_calls) == 2
+
+
+@pytest.mark.integration
+def test_prompt(agent_setup):
+ history = agent_setup(
+ blueprints=[],
+ messages=[HumanMessage("What is your name?")],
+ system_prompt="You are a helpful assistant named Johnny.",
+ )
+
+ assert "Johnny" in history[-1].content
+
+
+class Visualizer(Module):
+ @skill
+ def take_a_picture(self) -> Image:
+ """Takes a picture."""
+ return Image.from_file(get_data("cafe-smol.jpg")).to_rgb()
+
+
+@pytest.mark.integration
+def test_image(agent_setup):
+ history = agent_setup(
+ blueprints=[Visualizer.blueprint()],
+ messages=[
+ HumanMessage(
+ "What do you see? Take a picture using your camera and describe it. "
+ "Please mention one of the words which best match the image: "
+ "'stadium', 'cafe', 'battleship'."
+ )
+ ],
+ system_prompt="You are a helpful assistant that can use a camera to take pictures.",
+ )
+
+ response = history[-1].content.lower()
+ assert "cafe" in response
+ assert "stadium" not in response
+ assert "battleship" not in response
diff --git a/dimos/agents/mcp/test_mcp_client_unit.py b/dimos/agents/mcp/test_mcp_client_unit.py
new file mode 100644
index 0000000000..8cd888f851
--- /dev/null
+++ b/dimos/agents/mcp/test_mcp_client_unit.py
@@ -0,0 +1,145 @@
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import annotations
+
+import json
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from dimos.agents.mcp.mcp_client import McpClient
+from dimos.utils.sequential_ids import SequentialIds
+
+
+def _mock_post(url: str, **kwargs: object) -> MagicMock:
+ """Return a fake httpx response based on the JSON-RPC method."""
+ body = kwargs.get("json") or (kwargs.get("content") and json.loads(kwargs["content"]))
+ assert isinstance(body, dict)
+ method = body["method"]
+ req_id = body["id"]
+
+ result: object
+ if method == "initialize":
+ result = {
+ "protocolVersion": "2024-11-05",
+ "capabilities": {"tools": {}},
+ "serverInfo": {"name": "dimensional", "version": "1.0.0"},
+ }
+ elif method == "tools/list":
+ result = {
+ "tools": [
+ {
+ "name": "add",
+ "description": "Add two numbers",
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "x": {"type": "integer"},
+ "y": {"type": "integer"},
+ },
+ "required": ["x", "y"],
+ },
+ },
+ {
+ "name": "greet",
+ "description": "Say hello",
+ "inputSchema": {
+ "type": "object",
+ "properties": {
+ "name": {"type": "string"},
+ },
+ },
+ },
+ ]
+ }
+ elif method == "tools/call":
+ name = body["params"]["name"]
+ args = body["params"].get("arguments", {})
+ if name == "add":
+ text = str(args.get("x", 0) + args.get("y", 0))
+ elif name == "greet":
+ text = f"Hello, {args.get('name', 'world')}!"
+ else:
+ text = "Skill not found"
+ result = {"content": [{"type": "text", "text": text}]}
+ else:
+ resp = MagicMock()
+ resp.status_code = 200
+ resp.raise_for_status = MagicMock()
+ resp.json.return_value = {
+ "jsonrpc": "2.0",
+ "id": req_id,
+ "error": {"code": -32601, "message": f"Unknown: {method}"},
+ }
+ return resp
+
+ resp = MagicMock()
+ resp.status_code = 200
+ resp.raise_for_status = MagicMock()
+ resp.json.return_value = {"jsonrpc": "2.0", "id": req_id, "result": result}
+ return resp
+
+
+@pytest.fixture
+def mcp_client() -> McpClient:
+ """Build an McpClient wired to the mock MCP post handler."""
+ mock_http = MagicMock()
+ mock_http.post.side_effect = _mock_post
+
+ with patch("dimos.agents.mcp.mcp_client.httpx.Client", return_value=mock_http):
+ client = McpClient.__new__(McpClient)
+
+ client._http_client = mock_http
+ client._seq_ids = SequentialIds()
+ client.config = MagicMock()
+ client.config.mcp_server_url = "http://localhost:9990/mcp"
+ return client
+
+
+def test_fetch_tools_from_mcp_server(mcp_client: McpClient) -> None:
+ tools = mcp_client._fetch_tools()
+
+ assert len(tools) == 2
+ assert tools[0].name == "add"
+ assert tools[1].name == "greet"
+
+
+def test_tool_invocation_via_mcp(mcp_client: McpClient) -> None:
+ tools = mcp_client._fetch_tools()
+ add_tool = next(t for t in tools if t.name == "add")
+ greet_tool = next(t for t in tools if t.name == "greet")
+
+ assert add_tool.func(x=2, y=3) == "5"
+ assert greet_tool.func(name="Alice") == "Hello, Alice!"
+
+
+def test_mcp_request_error_propagation(mcp_client: McpClient) -> None:
+ def error_post(url: str, **kwargs: object) -> MagicMock:
+ resp = MagicMock()
+ resp.status_code = 200
+ resp.raise_for_status = MagicMock()
+ resp.json.return_value = {
+ "jsonrpc": "2.0",
+ "id": 1,
+ "error": {"code": -32601, "message": "Unknown: bad/method"},
+ }
+ return resp
+
+ mcp_client._http_client.post.side_effect = error_post
+
+ try:
+ mcp_client._mcp_request("bad/method")
+ raise AssertionError("Expected RuntimeError")
+ except RuntimeError as e:
+ assert "Unknown: bad/method" in str(e)
diff --git a/dimos/protocol/mcp/test_mcp_module.py b/dimos/agents/mcp/test_mcp_server.py
similarity index 62%
rename from dimos/protocol/mcp/test_mcp_module.py
rename to dimos/agents/mcp/test_mcp_server.py
index 050e24f13b..1cbca9e3e4 100644
--- a/dimos/protocol/mcp/test_mcp_module.py
+++ b/dimos/agents/mcp/test_mcp_server.py
@@ -16,34 +16,25 @@
import asyncio
import json
-from pathlib import Path
from unittest.mock import MagicMock
+from dimos.agents.mcp.mcp_server import handle_request
from dimos.core.module import SkillInfo
-from dimos.protocol.mcp.mcp import MCPModule
-def _make_mcp(skills: list[SkillInfo], call_results: dict[str, object]) -> MCPModule:
- """Create an MCPModule with pre-populated skills and mock RPC calls."""
- mcp = MCPModule.__new__(MCPModule)
- mcp._skills = skills
- mcp._rpc_calls = {}
+def _make_rpc_calls(
+ skills: list[SkillInfo], call_results: dict[str, object]
+) -> dict[str, MagicMock]:
+ """Create mock RPC calls for the given skills."""
+ rpc_calls: dict[str, MagicMock] = {}
for skill in skills:
mock_call = MagicMock()
if skill.func_name in call_results:
mock_call.return_value = call_results[skill.func_name]
else:
mock_call.return_value = None
- mcp._rpc_calls[skill.func_name] = mock_call
- return mcp
-
-
-def test_unitree_blueprint_has_mcp() -> None:
- contents = Path(
- "dimos/robot/unitree/go2/blueprints/agentic/unitree_go2_agentic_mcp.py"
- ).read_text()
- assert "agentic_mcp" in contents
- assert "MCPModule.blueprint()" in contents
+ rpc_calls[skill.func_name] = mock_call
+ return rpc_calls
def test_mcp_module_request_flow() -> None:
@@ -56,20 +47,21 @@ def test_mcp_module_request_flow() -> None:
}
)
skills = [SkillInfo(class_name="TestSkills", func_name="add", args_schema=schema)]
+ rpc_calls = _make_rpc_calls(skills, {"add": 5})
- mcp = _make_mcp(skills, {"add": 5})
-
- response = asyncio.run(mcp._handle_request({"method": "tools/list", "id": 1}))
+ response = asyncio.run(handle_request({"method": "tools/list", "id": 1}, skills, rpc_calls))
assert response["result"]["tools"][0]["name"] == "add"
assert response["result"]["tools"][0]["description"] == "Add two numbers"
response = asyncio.run(
- mcp._handle_request(
+ handle_request(
{
"method": "tools/call",
"id": 2,
"params": {"name": "add", "arguments": {"x": 2, "y": 3}},
- }
+ },
+ skills,
+ rpc_calls,
)
)
assert response["result"]["content"][0]["text"] == "5"
@@ -82,49 +74,40 @@ def test_mcp_module_handles_errors() -> None:
SkillInfo(class_name="TestSkills", func_name="fail_skill", args_schema=schema),
]
- mcp = _make_mcp(skills, {"ok_skill": "done"})
- mcp._rpc_calls["fail_skill"] = MagicMock(side_effect=RuntimeError("boom"))
+ rpc_calls = _make_rpc_calls(skills, {"ok_skill": "done"})
+ rpc_calls["fail_skill"] = MagicMock(side_effect=RuntimeError("boom"))
# All skills listed
- response = asyncio.run(mcp._handle_request({"method": "tools/list", "id": 1}))
+ response = asyncio.run(handle_request({"method": "tools/list", "id": 1}, skills, rpc_calls))
tool_names = {tool["name"] for tool in response["result"]["tools"]}
assert "ok_skill" in tool_names
assert "fail_skill" in tool_names
# Error skill returns error text
response = asyncio.run(
- mcp._handle_request(
- {"method": "tools/call", "id": 2, "params": {"name": "fail_skill", "arguments": {}}}
+ handle_request(
+ {"method": "tools/call", "id": 2, "params": {"name": "fail_skill", "arguments": {}}},
+ skills,
+ rpc_calls,
)
)
- assert "Error:" in response["result"]["content"][0]["text"]
+ assert "Error running tool" in response["result"]["content"][0]["text"]
assert "boom" in response["result"]["content"][0]["text"]
# Unknown skill returns not found
response = asyncio.run(
- mcp._handle_request(
- {"method": "tools/call", "id": 3, "params": {"name": "no_such", "arguments": {}}}
+ handle_request(
+ {"method": "tools/call", "id": 3, "params": {"name": "no_such", "arguments": {}}},
+ skills,
+ rpc_calls,
)
)
assert "not found" in response["result"]["content"][0]["text"].lower()
def test_mcp_module_initialize_and_unknown() -> None:
- mcp = _make_mcp([], {})
-
- response = asyncio.run(mcp._handle_request({"method": "initialize", "id": 1}))
+ response = asyncio.run(handle_request({"method": "initialize", "id": 1}, [], {}))
assert response["result"]["serverInfo"]["name"] == "dimensional"
- response = asyncio.run(mcp._handle_request({"method": "unknown/method", "id": 2}))
+ response = asyncio.run(handle_request({"method": "unknown/method", "id": 2}, [], {}))
assert response["error"]["code"] == -32601
-
-
-def test_mcp_module_invalid_tool_name() -> None:
- mcp = _make_mcp([], {})
-
- response = asyncio.run(
- mcp._handle_request(
- {"method": "tools/call", "id": 1, "params": {"name": 123, "arguments": {}}}
- )
- )
- assert response["error"]["code"] == -32602
diff --git a/dimos/control/blueprints.py b/dimos/control/blueprints.py
index 8762ebd95b..5c33928e79 100644
--- a/dimos/control/blueprints.py
+++ b/dimos/control/blueprints.py
@@ -30,10 +30,15 @@
from __future__ import annotations
-from dimos.control.components import HardwareComponent, HardwareType, make_joints
+from dimos.control.components import (
+ HardwareComponent,
+ HardwareType,
+ make_joints,
+ make_twist_base_joints,
+)
from dimos.control.coordinator import TaskConfig, control_coordinator
from dimos.core.transport import LCMTransport
-from dimos.msgs.geometry_msgs import PoseStamped
+from dimos.msgs.geometry_msgs import PoseStamped, Twist
from dimos.msgs.sensor_msgs import JointState
from dimos.teleop.quest.quest_types import Buttons
from dimos.utils.data import LfsPath
@@ -594,6 +599,80 @@
)
+# =============================================================================
+# Twist Base Blueprints (velocity-commanded platforms)
+# =============================================================================
+
+# Mock holonomic twist base (3-DOF: vx, vy, wz)
+_base_joints = make_twist_base_joints("base")
+coordinator_mock_twist_base = control_coordinator(
+ hardware=[
+ HardwareComponent(
+ hardware_id="base",
+ hardware_type=HardwareType.BASE,
+ joints=_base_joints,
+ adapter_type="mock_twist_base",
+ ),
+ ],
+ tasks=[
+ TaskConfig(
+ name="vel_base",
+ type="velocity",
+ joint_names=_base_joints,
+ priority=10,
+ ),
+ ],
+).transports(
+ {
+ ("joint_state", JointState): LCMTransport("/coordinator/joint_state", JointState),
+ ("twist_command", Twist): LCMTransport("/cmd_vel", Twist),
+ }
+)
+
+
+# =============================================================================
+# Mobile Manipulation Blueprints (arm + twist base)
+# =============================================================================
+
+# Mock arm (7-DOF) + mock holonomic base (3-DOF)
+_mm_base_joints = make_twist_base_joints("base")
+coordinator_mobile_manip_mock = control_coordinator(
+ hardware=[
+ HardwareComponent(
+ hardware_id="arm",
+ hardware_type=HardwareType.MANIPULATOR,
+ joints=make_joints("arm", 7),
+ adapter_type="mock",
+ ),
+ HardwareComponent(
+ hardware_id="base",
+ hardware_type=HardwareType.BASE,
+ joints=_mm_base_joints,
+ adapter_type="mock_twist_base",
+ ),
+ ],
+ tasks=[
+ TaskConfig(
+ name="traj_arm",
+ type="trajectory",
+ joint_names=[f"arm_joint{i + 1}" for i in range(7)],
+ priority=10,
+ ),
+ TaskConfig(
+ name="vel_base",
+ type="velocity",
+ joint_names=_mm_base_joints,
+ priority=10,
+ ),
+ ],
+).transports(
+ {
+ ("joint_state", JointState): LCMTransport("/coordinator/joint_state", JointState),
+ ("twist_command", Twist): LCMTransport("/cmd_vel", Twist),
+ }
+)
+
+
# =============================================================================
# Raw Blueprints (for programmatic setup)
# =============================================================================
@@ -624,8 +703,12 @@
# Dual arm
"coordinator_dual_mock",
"coordinator_dual_xarm",
+ # Mobile manipulation
+ "coordinator_mobile_manip_mock",
# Single arm
"coordinator_mock",
+ # Twist base
+ "coordinator_mock_twist_base",
"coordinator_piper",
"coordinator_piper_xarm",
# Teleop IK
diff --git a/dimos/control/components.py b/dimos/control/components.py
index e3022468ed..8157a288d2 100644
--- a/dimos/control/components.py
+++ b/dimos/control/components.py
@@ -71,7 +71,41 @@ def make_joints(hardware_id: HardwareId, dof: int) -> list[JointName]:
return [f"{hardware_id}_joint{i + 1}" for i in range(dof)]
+# Maps virtual joint suffix β (Twist group, Twist field)
+TWIST_SUFFIX_MAP: dict[str, tuple[str, str]] = {
+ "vx": ("linear", "x"),
+ "vy": ("linear", "y"),
+ "vz": ("linear", "z"),
+ "wx": ("angular", "x"),
+ "wy": ("angular", "y"),
+ "wz": ("angular", "z"),
+}
+
+_DEFAULT_TWIST_SUFFIXES = ["vx", "vy", "wz"]
+
+
+def make_twist_base_joints(
+ hardware_id: HardwareId,
+ suffixes: list[str] | None = None,
+) -> list[JointName]:
+ """Create virtual joint names for a twist base.
+
+ Args:
+ hardware_id: The hardware identifier (e.g., "base")
+ suffixes: Velocity DOF suffixes. Defaults to ["vx", "vy", "wz"] (holonomic).
+
+ Returns:
+ List of joint names like ["base_vx", "base_vy", "base_wz"]
+ """
+ suffixes = suffixes or _DEFAULT_TWIST_SUFFIXES
+ for s in suffixes:
+ if s not in TWIST_SUFFIX_MAP:
+ raise ValueError(f"Unknown twist suffix '{s}'. Valid: {list(TWIST_SUFFIX_MAP)}")
+ return [f"{hardware_id}_{s}" for s in suffixes]
+
+
__all__ = [
+ "TWIST_SUFFIX_MAP",
"HardwareComponent",
"HardwareId",
"HardwareType",
@@ -79,4 +113,5 @@ def make_joints(hardware_id: HardwareId, dof: int) -> list[JointName]:
"JointState",
"TaskName",
"make_joints",
+ "make_twist_base_joints",
]
diff --git a/dimos/control/coordinator.py b/dimos/control/coordinator.py
index 5685a9f9c7..c9182e6aa8 100644
--- a/dimos/control/coordinator.py
+++ b/dimos/control/coordinator.py
@@ -32,19 +32,32 @@
import time
from typing import TYPE_CHECKING, Any
-from dimos.control.components import HardwareComponent, HardwareId, JointName, TaskName
-from dimos.control.hardware_interface import ConnectedHardware
+from dimos.control.components import (
+ TWIST_SUFFIX_MAP,
+ HardwareComponent,
+ HardwareId,
+ HardwareType,
+ JointName,
+ TaskName,
+)
+from dimos.control.hardware_interface import ConnectedHardware, ConnectedTwistBase
from dimos.control.task import ControlTask
from dimos.control.tick_loop import TickLoop
from dimos.core import In, Module, Out, rpc
from dimos.core.module import ModuleConfig
+from dimos.hardware.drive_trains.spec import (
+ TwistBaseAdapter,
+)
from dimos.msgs.geometry_msgs import (
PoseStamped, # noqa: TC001 - needed at runtime for In[PoseStamped]
+ Twist, # noqa: TC001 - needed at runtime for In[Twist]
)
from dimos.msgs.sensor_msgs import (
- JointState, # noqa: TC001 - needed at runtime for Out[JointState]
+ JointState,
+)
+from dimos.teleop.quest.quest_types import (
+ Buttons, # noqa: TC001 - needed at runtime for In[Buttons]
)
-from dimos.teleop.quest.quest_types import Buttons # noqa: TC001 - needed for teleop buttons
from dimos.utils.logging_config import setup_logger
if TYPE_CHECKING:
@@ -148,6 +161,9 @@ class ControlCoordinator(Module[ControlCoordinatorConfig]):
# Uses frame_id as task name for routing
cartesian_command: In[PoseStamped]
+ # Input: Streaming twist commands for velocity-commanded platforms
+ twist_command: In[Twist]
+
# Input: Teleop buttons for engage/disengage signaling
buttons: In[Buttons]
@@ -174,6 +190,7 @@ def __init__(self, *args: Any, **kwargs: Any) -> None:
# Subscription handles for streaming commands
self._joint_command_unsub: Callable[[], None] | None = None
self._cartesian_command_unsub: Callable[[], None] | None = None
+ self._twist_command_unsub: Callable[[], None] | None = None
self._buttons_unsub: Callable[[], None] | None = None
logger.info(f"ControlCoordinator initialized at {self.config.tick_rate}Hz")
@@ -206,7 +223,11 @@ def _setup_from_config(self) -> None:
def _setup_hardware(self, component: HardwareComponent) -> None:
"""Connect and add a single hardware adapter."""
- adapter = self._create_adapter(component)
+ adapter: ManipulatorAdapter | TwistBaseAdapter
+ if component.hardware_type == HardwareType.BASE:
+ adapter = self._create_twist_base_adapter(component)
+ else:
+ adapter = self._create_adapter(component)
if not adapter.connect():
raise RuntimeError(f"Failed to connect to {component.adapter_type} adapter")
@@ -230,6 +251,16 @@ def _create_adapter(self, component: HardwareComponent) -> ManipulatorAdapter:
address=component.address,
)
+ def _create_twist_base_adapter(self, component: HardwareComponent) -> TwistBaseAdapter:
+ """Create a twist base adapter from component config."""
+ from dimos.hardware.drive_trains.registry import twist_base_adapter_registry
+
+ return twist_base_adapter_registry.create(
+ component.adapter_type,
+ dof=len(component.joints),
+ address=component.address,
+ )
+
def _create_task_from_config(self, cfg: TaskConfig) -> ControlTask:
"""Create a control task from config."""
task_type = cfg.type.lower()
@@ -310,19 +341,34 @@ def _create_task_from_config(self, cfg: TaskConfig) -> ControlTask:
@rpc
def add_hardware(
self,
- adapter: ManipulatorAdapter,
+ adapter: ManipulatorAdapter | TwistBaseAdapter,
component: HardwareComponent,
) -> bool:
"""Register a hardware adapter with the coordinator."""
+ is_base = component.hardware_type == HardwareType.BASE
+ if is_base != isinstance(adapter, TwistBaseAdapter):
+ raise TypeError(
+ f"Hardware type / adapter mismatch for '{component.hardware_id}': "
+ f"hardware_type={component.hardware_type.value} but got "
+ f"{type(adapter).__name__}"
+ )
+
with self._hardware_lock:
if component.hardware_id in self._hardware:
logger.warning(f"Hardware {component.hardware_id} already registered")
return False
- connected = ConnectedHardware(
- adapter=adapter,
- component=component,
- )
+ if isinstance(adapter, TwistBaseAdapter):
+ connected: ConnectedHardware = ConnectedTwistBase(
+ adapter=adapter,
+ component=component,
+ )
+ else:
+ connected = ConnectedHardware(
+ adapter=adapter,
+ component=component,
+ )
+
self._hardware[component.hardware_id] = connected
for joint_name in connected.joint_names:
@@ -490,6 +536,34 @@ def _on_cartesian_command(self, msg: PoseStamped) -> None:
task.on_cartesian_command(msg, t_now)
+ def _on_twist_command(self, msg: Twist) -> None:
+ """Convert Twist β virtual joint velocities and route via _on_joint_command.
+
+ Maps Twist fields to virtual joints using suffix convention:
+ base_vx β linear.x, base_vy β linear.y, base_wz β angular.z, etc.
+ """
+ names: list[str] = []
+ velocities: list[float] = []
+
+ with self._hardware_lock:
+ for hw in self._hardware.values():
+ if hw.component.hardware_type != HardwareType.BASE:
+ continue
+ for joint_name in hw.joint_names:
+ # Extract suffix (e.g., "base_vx" β "vx")
+ suffix = joint_name.rsplit("_", 1)[-1]
+ mapping = TWIST_SUFFIX_MAP.get(suffix)
+ if mapping is None:
+ continue
+ group, axis = mapping
+ value = getattr(getattr(msg, group), axis)
+ names.append(joint_name)
+ velocities.append(value)
+
+ if names:
+ joint_state = JointState(name=names, velocity=velocities)
+ self._on_joint_command(joint_state)
+
def _on_buttons(self, msg: Buttons) -> None:
"""Forward button state to all tasks."""
with self._task_lock:
@@ -536,6 +610,9 @@ def set_gripper_position(self, hardware_id: str, position: float) -> bool:
if hw is None:
logger.warning(f"Hardware '{hardware_id}' not found for gripper command")
return False
+ if isinstance(hw, ConnectedTwistBase):
+ logger.warning(f"Hardware '{hardware_id}' is a twist base, no gripper support")
+ return False
return hw.adapter.write_gripper_position(position)
@rpc
@@ -549,6 +626,8 @@ def get_gripper_position(self, hardware_id: str) -> float | None:
hw = self._hardware.get(hardware_id)
if hw is None:
return None
+ if isinstance(hw, ConnectedTwistBase):
+ return None
return hw.adapter.read_gripper_position()
# =========================================================================
@@ -610,6 +689,18 @@ def start(self) -> None:
"Use task_invoke RPC or set transport via blueprint."
)
+ # Subscribe to twist commands if any twist base hardware configured
+ has_twist_base = any(c.hardware_type == HardwareType.BASE for c in self.config.hardware)
+ if has_twist_base:
+ try:
+ self._twist_command_unsub = self.twist_command.subscribe(self._on_twist_command)
+ logger.info("Subscribed to twist_command for twist base control")
+ except Exception:
+ logger.warning(
+ "Twist base configured but could not subscribe to twist_command. "
+ "Use task_invoke RPC or set transport via blueprint."
+ )
+
# Subscribe to buttons if any teleop_ik tasks configured (engage/disengage)
has_teleop_ik = any(t.type == "teleop_ik" for t in self.config.tasks)
if has_teleop_ik:
@@ -630,6 +721,9 @@ def stop(self) -> None:
if self._cartesian_command_unsub:
self._cartesian_command_unsub()
self._cartesian_command_unsub = None
+ if self._twist_command_unsub:
+ self._twist_command_unsub()
+ self._twist_command_unsub = None
if self._buttons_unsub:
self._buttons_unsub()
self._buttons_unsub = None
diff --git a/dimos/control/examples/twist_base_keyboard_teleop.py b/dimos/control/examples/twist_base_keyboard_teleop.py
new file mode 100644
index 0000000000..2d7651145a
--- /dev/null
+++ b/dimos/control/examples/twist_base_keyboard_teleop.py
@@ -0,0 +1,59 @@
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Keyboard teleop for twist base via ControlCoordinator.
+
+Runs a mock holonomic twist base with pygame keyboard control.
+WASD keys publish Twist β coordinator's twist_command port β virtual joints
+β tick loop β MockTwistBaseAdapter.
+
+Controls:
+ W/S: Forward/backward (linear.x)
+ Q/E: Strafe left/right (linear.y)
+ A/D: Turn left/right (angular.z)
+ Shift: 2x boost
+ Ctrl: 0.5x slow
+ Space: Emergency stop
+ ESC: Quit
+
+Usage:
+ python -m dimos.control.examples.twist_base_keyboard_teleop
+"""
+
+from __future__ import annotations
+
+from dimos.control.blueprints import coordinator_mock_twist_base
+from dimos.robot.unitree.keyboard_teleop import keyboard_teleop
+
+
+def main() -> None:
+ """Run mock twist base + keyboard teleop."""
+ coord = coordinator_mock_twist_base.build()
+ teleop = keyboard_teleop().build()
+
+ print("Starting mock twist base coordinator + keyboard teleop...")
+ print("Coordinator tick loop: 100Hz")
+ print("Keyboard teleop: 50Hz on /cmd_vel")
+ print()
+
+ coord.start()
+ teleop.start()
+
+ # Block until Ctrl+C β loop() handles KeyboardInterrupt and calls stop()
+ coord.loop()
+ teleop.stop()
+
+
+if __name__ == "__main__":
+ main()
diff --git a/dimos/control/hardware_interface.py b/dimos/control/hardware_interface.py
index 9f6eb99851..4e5d1d634c 100644
--- a/dimos/control/hardware_interface.py
+++ b/dimos/control/hardware_interface.py
@@ -14,10 +14,12 @@
"""Connected hardware for the ControlCoordinator.
-Wraps ManipulatorAdapter with coordinator-specific features:
-- Namespaced joint names (e.g., "left_joint1")
-- Unified read/write interface
-- Hold-last-value for partial commands
+Provides two wrapper types:
+- ConnectedHardware: Wraps ManipulatorAdapter for joint-controlled arms
+- ConnectedTwistBase: Wraps TwistBaseAdapter for velocity-commanded platforms
+
+Both share the same duck-type interface (read_state, write_command, etc.)
+so the tick loop treats them uniformly.
"""
from __future__ import annotations
@@ -30,6 +32,7 @@
if TYPE_CHECKING:
from dimos.control.components import HardwareComponent, HardwareId, JointName, JointState
+ from dimos.hardware.drive_trains.spec import TwistBaseAdapter
logger = logging.getLogger(__name__)
@@ -193,6 +196,98 @@ def _build_ordered_command(self) -> list[float]:
return [self._last_commanded[name] for name in self._joint_names]
+class ConnectedTwistBase(ConnectedHardware):
+ """Runtime wrapper for a twist base connected to the coordinator.
+
+ Inherits from ConnectedHardware and overrides behavior for
+ velocity-commanded platforms (holonomic bases, drones, quadrupeds, etc.).
+
+ Key differences from ConnectedHardware:
+ - Positions come from odometry (or zeros if unavailable)
+ - Efforts are always zero
+ - write_command always sends velocities regardless of mode
+ - No retry loop for initialization (twist bases start at zero velocity)
+ """
+
+ _twist_adapter: TwistBaseAdapter
+
+ def __init__(
+ self,
+ adapter: TwistBaseAdapter,
+ component: HardwareComponent,
+ ) -> None:
+ from dimos.hardware.drive_trains.spec import TwistBaseAdapter as TwistBaseAdapterProto
+
+ if not isinstance(adapter, TwistBaseAdapterProto):
+ raise TypeError("adapter must implement TwistBaseAdapter")
+
+ self._twist_adapter = adapter
+ self._component = component
+ self._joint_names = component.joints
+
+ # Twist bases start at zero velocity β no need to read from hardware
+ self._last_commanded: dict[str, float] = {name: 0.0 for name in self._joint_names}
+ self._initialized = True
+ self._warned_unknown_joints: set[str] = set()
+ self._current_mode: ControlMode | None = None
+
+ @property
+ def adapter(self) -> TwistBaseAdapter: # type: ignore[override]
+ """The underlying twist base adapter."""
+ return self._twist_adapter
+
+ def disconnect(self) -> None:
+ """Disconnect the underlying adapter."""
+ self._twist_adapter.disconnect()
+
+ def read_state(self) -> dict[JointName, JointState]:
+ """Read state as {joint_name: JointState}.
+
+ Positions come from odometry (zeros if unavailable).
+ Velocities from adapter. Efforts are always zero.
+ """
+ from dimos.control.components import JointState
+
+ velocities = self._twist_adapter.read_velocities()
+ odometry = self._twist_adapter.read_odometry()
+ positions = odometry if odometry is not None else [0.0] * self.dof
+
+ return {
+ name: JointState(
+ position=positions[i],
+ velocity=velocities[i],
+ effort=0.0,
+ )
+ for i, name in enumerate(self._joint_names)
+ }
+
+ def write_command(self, commands: dict[str, float], _mode: ControlMode) -> bool:
+ """Write velocity commands β always sends velocities regardless of mode.
+
+ Args:
+ commands: {joint_name: velocity} - can be partial
+ _mode: Control mode (ignored β twist bases always use velocity)
+
+ Returns:
+ True if command was sent successfully
+ """
+ # Update last commanded for joints we received
+ for joint_name, value in commands.items():
+ if joint_name in self._last_commanded:
+ self._last_commanded[joint_name] = value
+ elif joint_name not in self._warned_unknown_joints:
+ logger.warning(
+ f"TwistBase {self.hardware_id} received command for unknown joint "
+ f"{joint_name}. Valid joints: {self._joint_names}"
+ )
+ self._warned_unknown_joints.add(joint_name)
+
+ # Build ordered velocity list and send
+ ordered = self._build_ordered_command()
+ return self._twist_adapter.write_velocities(ordered)
+
+
__all__ = [
"ConnectedHardware",
+ "ConnectedTwistBase",
]
diff --git a/dimos/environment/environment.py b/dimos/environment/environment.py
deleted file mode 100644
index ba1923b765..0000000000
--- a/dimos/environment/environment.py
+++ /dev/null
@@ -1,178 +0,0 @@
-# Copyright 2025-2026 Dimensional Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from abc import ABC, abstractmethod
-
-import numpy as np
-
-
-class Environment(ABC):
- def __init__(self) -> None:
- self.environment_type = None
- self.graph = None
-
- @abstractmethod
- def label_objects(self) -> list[str]:
- """
- Label all objects in the environment.
-
- Returns:
- A list of string labels representing the objects in the environment.
- """
- pass
-
- @abstractmethod
- def get_visualization(self, format_type): # type: ignore[no-untyped-def]
- """Return different visualization formats like images, NERFs, or other 3D file types."""
- pass
-
- @abstractmethod
- def generate_segmentations( # type: ignore[no-untyped-def]
- self, model: str | None = None, objects: list[str] | None = None, *args, **kwargs
- ) -> list[np.ndarray]: # type: ignore[type-arg]
- """
- Generate object segmentations of objects[] using neural methods.
-
- Args:
- model (str, optional): The string of the desired segmentation model (SegmentAnything, etc.)
- objects (list[str], optional): The list of strings of the specific objects to segment.
- *args: Variable length argument list.
- **kwargs: Arbitrary keyword arguments.
-
- Returns:
- list of numpy.ndarray: A list where each element is a numpy array
- representing a binary mask for a segmented area of an object in the environment.
-
- Note:
- The specific arguments and their usage will depend on the subclass implementation.
- """
- pass
-
- @abstractmethod
- def get_segmentations(self) -> list[np.ndarray]: # type: ignore[type-arg]
- """
- Get segmentations using a method like 'segment anything'.
-
- Returns:
- list of numpy.ndarray: A list where each element is a numpy array
- representing a binary mask for a segmented area of an object in the environment.
- """
- pass
-
- @abstractmethod
- def generate_point_cloud(self, object: str | None = None, *args, **kwargs) -> np.ndarray: # type: ignore[no-untyped-def, type-arg]
- """
- Generate a point cloud for the entire environment or a specific object.
-
- Args:
- object (str, optional): The string of the specific object to get the point cloud for.
- If None, returns the point cloud for the entire environment.
- *args: Variable length argument list.
- **kwargs: Arbitrary keyword arguments.
-
- Returns:
- np.ndarray: A numpy array representing the generated point cloud.
- Shape: (n, 3) where n is the number of points and each point is [x, y, z].
-
- Note:
- The specific arguments and their usage will depend on the subclass implementation.
- """
- pass
-
- @abstractmethod
- def get_point_cloud(self, object: str | None = None) -> np.ndarray: # type: ignore[type-arg]
- """
- Return point clouds of the entire environment or a specific object.
-
- Args:
- object (str, optional): The string of the specific object to get the point cloud for. If None, returns the point cloud for the entire environment.
-
- Returns:
- np.ndarray: A numpy array representing the point cloud.
- Shape: (n, 3) where n is the number of points and each point is [x, y, z].
- """
- pass
-
- @abstractmethod
- def generate_depth_map( # type: ignore[no-untyped-def]
- self,
- stereo: bool | None = None,
- monocular: bool | None = None,
- model: str | None = None,
- *args,
- **kwargs,
- ) -> np.ndarray: # type: ignore[type-arg]
- """
- Generate a depth map using monocular or stereo camera methods.
-
- Args:
- stereo (bool, optional): Whether to stereo camera is avaliable for ground truth depth map generation.
- monocular (bool, optional): Whether to use monocular camera for neural depth map generation.
- model (str, optional): The string of the desired monocular depth model (Metric3D, ZoeDepth, etc.)
- *args: Variable length argument list.
- **kwargs: Arbitrary keyword arguments.
-
- Returns:
- np.ndarray: A 2D numpy array representing the generated depth map.
- Shape: (height, width) where each value represents the depth
- at that pixel location.
-
- Note:
- The specific arguments and their usage will depend on the subclass implementation.
- """
- pass
-
- @abstractmethod
- def get_depth_map(self) -> np.ndarray: # type: ignore[type-arg]
- """
- Return a depth map of the environment.
-
- Returns:
- np.ndarray: A 2D numpy array representing the depth map.
- Shape: (height, width) where each value represents the depth
- at that pixel location. Typically, closer objects have smaller
- values and farther objects have larger values.
-
- Note:
- The exact range and units of the depth values may vary depending on the
- specific implementation and the sensor or method used to generate the depth map.
- """
- pass
-
- def initialize_from_images(self, images): # type: ignore[no-untyped-def]
- """Initialize the environment from a set of image frames or video."""
- raise NotImplementedError("This method is not implemented for this environment type.")
-
- def initialize_from_file(self, file_path): # type: ignore[no-untyped-def]
- """Initialize the environment from a spatial file type.
-
- Supported file types include:
- - GLTF/GLB (GL Transmission Format)
- - FBX (Filmbox)
- - OBJ (Wavefront Object)
- - USD/USDA/USDC (Universal Scene Description)
- - STL (Stereolithography)
- - COLLADA (DAE)
- - Alembic (ABC)
- - PLY (Polygon File Format)
- - 3DS (3D Studio)
- - VRML/X3D (Virtual Reality Modeling Language)
-
- Args:
- file_path (str): Path to the spatial file.
-
- Raises:
- NotImplementedError: If the method is not implemented for this environment type.
- """
- raise NotImplementedError("This method is not implemented for this environment type.")
diff --git a/dimos/hardware/drive_trains/__init__.py b/dimos/hardware/drive_trains/__init__.py
new file mode 100644
index 0000000000..c6e843feea
--- /dev/null
+++ b/dimos/hardware/drive_trains/__init__.py
@@ -0,0 +1,15 @@
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Drive train hardware adapters for velocity-commanded platforms."""
diff --git a/dimos/hardware/drive_trains/flowbase/__init__.py b/dimos/hardware/drive_trains/flowbase/__init__.py
new file mode 100644
index 0000000000..25f95e399c
--- /dev/null
+++ b/dimos/hardware/drive_trains/flowbase/__init__.py
@@ -0,0 +1,15 @@
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""FlowBase twist base adapter for holonomic base control via Portal RPC."""
diff --git a/dimos/hardware/drive_trains/flowbase/adapter.py b/dimos/hardware/drive_trains/flowbase/adapter.py
new file mode 100644
index 0000000000..5b5563792d
--- /dev/null
+++ b/dimos/hardware/drive_trains/flowbase/adapter.py
@@ -0,0 +1,206 @@
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""FlowBase adapter β wraps Portal RPC client for holonomic base control.
+
+Frame convention: FlowBase uses inverted Y-axis compared to standard convention.
+We negate vy and wz when sending to the hardware.
+
+ Standard (ROS): FlowBase:
+ +Y -Y
+ β β
+ ββββΌβββ +X ββββΌβββ +X
+ | |
+"""
+
+from __future__ import annotations
+
+import logging
+import threading
+from typing import TYPE_CHECKING
+
+import numpy as np
+
+if TYPE_CHECKING:
+ from dimos.hardware.drive_trains.registry import TwistBaseAdapterRegistry
+
+logger = logging.getLogger(__name__)
+
+
+class FlowBaseAdapter:
+ """TwistBaseAdapter implementation for FlowBase holonomic platform.
+
+ Communicates with FlowBase controller via Portal RPC over TCP.
+ Expects 3 DOF: [vx, vy, wz] (holonomic base).
+
+ Args:
+ dof: Number of velocity DOFs (must be 3 for FlowBase)
+ address: Portal RPC address as "host:port" (default: "172.6.2.20:11323")
+ """
+
+ def __init__(self, dof: int = 3, address: str | None = None, **_: object) -> None:
+ if dof != 3:
+ raise ValueError(f"FlowBase only supports 3 DOF (holonomic), got {dof}")
+
+ self._address = address or "172.6.2.20:11323"
+ self._client = None
+ self._connected = False
+ self._enabled = False
+ self._lock = threading.Lock()
+
+ # Last commanded velocities (in standard frame, before negation)
+ self._last_velocities = [0.0, 0.0, 0.0]
+
+ # =========================================================================
+ # Connection
+ # =========================================================================
+
+ def connect(self) -> bool:
+ """Connect to FlowBase controller via Portal RPC."""
+ try:
+ import portal # type: ignore[import-untyped]
+
+ self._client = portal.Client(self._address)
+ self._connected = True
+ logger.info(f"Connected to FlowBase at {self._address}")
+ return True
+ except Exception as e:
+ logger.error(f"Failed to connect to FlowBase at {self._address}: {e}")
+ self._connected = False
+ return False
+
+ def disconnect(self) -> None:
+ """Disconnect and send zero velocity."""
+ if self._connected and self._client:
+ try:
+ self._send_velocity(0.0, 0.0, 0.0)
+ except Exception:
+ pass
+ try:
+ self._client.close()
+ except Exception:
+ pass
+ self._connected = False
+ self._client = None
+
+ def is_connected(self) -> bool:
+ """Check if connected to FlowBase."""
+ return self._connected
+
+ # =========================================================================
+ # Info
+ # =========================================================================
+
+ def get_dof(self) -> int:
+ """FlowBase is always 3 DOF (vx, vy, wz)."""
+ return 3
+
+ # =========================================================================
+ # State Reading
+ # =========================================================================
+
+ def read_velocities(self) -> list[float]:
+ """Return last commanded velocities (FlowBase doesn't report actual)."""
+ with self._lock:
+ return self._last_velocities.copy()
+
+ def read_odometry(self) -> list[float] | None:
+ """Read odometry from FlowBase as [x, y, theta]."""
+ if not self._connected or not self._client:
+ return None
+
+ try:
+ with self._lock:
+ odom = self._client.get_odometry({}).result()
+
+ if odom is None:
+ return None
+
+ translation = odom["translation"] # [x, y]
+ rotation = odom["rotation"] # theta in radians
+ return [float(translation[0]), float(translation[1]), float(rotation)]
+ except Exception as e:
+ logger.error(f"Error reading FlowBase odometry: {e}")
+ return None
+
+ # =========================================================================
+ # Control
+ # =========================================================================
+
+ def write_velocities(self, velocities: list[float]) -> bool:
+ """Send velocity command to FlowBase.
+
+ Args:
+ velocities: [vx, vy, wz] in standard frame (m/s, rad/s)
+ """
+ if len(velocities) != 3:
+ return False
+
+ if not self._connected or not self._client:
+ return False
+
+ vx, vy, wz = velocities
+ with self._lock:
+ self._last_velocities = list(velocities)
+
+ # Negate vy and wz for FlowBase's inverted Y-axis frame
+ return self._send_velocity(vx, -vy, -wz)
+
+ def write_stop(self) -> bool:
+ """Stop all motion."""
+ with self._lock:
+ self._last_velocities = [0.0, 0.0, 0.0]
+ if not self._connected or not self._client:
+ return False
+ return self._send_velocity(0.0, 0.0, 0.0)
+
+ # =========================================================================
+ # Enable/Disable
+ # =========================================================================
+
+ def write_enable(self, enable: bool) -> bool:
+ """Enable/disable the platform (FlowBase is always enabled when connected)."""
+ self._enabled = enable
+ return True
+
+ def read_enabled(self) -> bool:
+ """Check if platform is enabled."""
+ return self._enabled
+
+ # =========================================================================
+ # Internal
+ # =========================================================================
+
+ def _send_velocity(self, vx: float, vy: float, wz: float) -> bool:
+ """Send raw velocity to FlowBase via Portal RPC."""
+ try:
+ command = {
+ "target_velocity": np.array([vx, vy, wz]),
+ "frame": "local",
+ }
+ with self._lock:
+ assert self._client is not None
+ self._client.set_target_velocity(command).result()
+ return True
+ except Exception as e:
+ logger.error(f"Error sending FlowBase velocity: {e}")
+ return False
+
+
+def register(registry: TwistBaseAdapterRegistry) -> None:
+ """Register this adapter with the registry."""
+ registry.register("flowbase", FlowBaseAdapter)
+
+
+__all__ = ["FlowBaseAdapter"]
diff --git a/dimos/protocol/mcp/__main__.py b/dimos/hardware/drive_trains/mock/__init__.py
similarity index 56%
rename from dimos/protocol/mcp/__main__.py
rename to dimos/hardware/drive_trains/mock/__init__.py
index a58e59d367..9b6f630040 100644
--- a/dimos/protocol/mcp/__main__.py
+++ b/dimos/hardware/drive_trains/mock/__init__.py
@@ -12,25 +12,19 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-"""CLI entry point for Dimensional MCP Bridge.
-
-Connects Claude Code (or other MCP clients) to a running DimOS agent.
+"""Mock twist base adapter for testing without hardware.
Usage:
- python -m dimos.protocol.mcp # Bridge to running DimOS on default port
+ >>> from dimos.hardware.drive_trains.mock import MockTwistBaseAdapter
+ >>> adapter = MockTwistBaseAdapter(dof=3)
+ >>> adapter.connect()
+ True
+ >>> adapter.write_velocities([0.5, 0.0, 0.1])
+ True
+ >>> adapter.read_velocities()
+ [0.5, 0.0, 0.1]
"""
-from __future__ import annotations
-
-import asyncio
-
-from dimos.protocol.mcp.bridge import main as bridge_main
-
-
-def main() -> None:
- """Main entry point - connects to running DimOS via bridge."""
- asyncio.run(bridge_main())
-
+from dimos.hardware.drive_trains.mock.adapter import MockTwistBaseAdapter
-if __name__ == "__main__":
- main()
+__all__ = ["MockTwistBaseAdapter"]
diff --git a/dimos/hardware/drive_trains/mock/adapter.py b/dimos/hardware/drive_trains/mock/adapter.py
new file mode 100644
index 0000000000..2091ec59d0
--- /dev/null
+++ b/dimos/hardware/drive_trains/mock/adapter.py
@@ -0,0 +1,137 @@
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Mock twist base adapter for testing - no hardware required.
+
+Usage:
+ >>> from dimos.hardware.drive_trains.mock import MockTwistBaseAdapter
+ >>> adapter = MockTwistBaseAdapter(dof=3)
+ >>> adapter.connect()
+ True
+ >>> adapter.write_velocities([0.5, 0.0, 0.1])
+ True
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+ from dimos.hardware.drive_trains.registry import TwistBaseAdapterRegistry
+
+
+class MockTwistBaseAdapter:
+ """Fake twist base adapter for unit tests.
+
+ Implements TwistBaseAdapter protocol with in-memory state.
+ Useful for:
+ - Unit testing coordinator logic without hardware
+ - Integration testing with predictable behavior
+ - Development without a physical base
+ """
+
+ def __init__(self, dof: int = 3, **_: object) -> None:
+ self._dof = dof
+ self._velocities = [0.0] * dof
+ self._odometry: list[float] | None = [0.0] * dof
+ self._enabled = False
+ self._connected = False
+
+ # =========================================================================
+ # Connection
+ # =========================================================================
+
+ def connect(self) -> bool:
+ """Simulate connection."""
+ self._connected = True
+ return True
+
+ def disconnect(self) -> None:
+ """Simulate disconnection."""
+ self._connected = False
+
+ def is_connected(self) -> bool:
+ """Check mock connection status."""
+ return self._connected
+
+ # =========================================================================
+ # Info
+ # =========================================================================
+
+ def get_dof(self) -> int:
+ """Return DOF."""
+ return self._dof
+
+ # =========================================================================
+ # State Reading
+ # =========================================================================
+
+ def read_velocities(self) -> list[float]:
+ """Return mock velocities."""
+ return self._velocities.copy()
+
+ def read_odometry(self) -> list[float] | None:
+ """Return mock odometry."""
+ if self._odometry is None:
+ return None
+ return self._odometry.copy()
+
+ # =========================================================================
+ # Control
+ # =========================================================================
+
+ def write_velocities(self, velocities: list[float]) -> bool:
+ """Set mock velocities."""
+ if len(velocities) != self._dof:
+ return False
+ self._velocities = list(velocities)
+ return True
+
+ def write_stop(self) -> bool:
+ """Stop mock motion."""
+ self._velocities = [0.0] * self._dof
+ return True
+
+ # =========================================================================
+ # Enable/Disable
+ # =========================================================================
+
+ def write_enable(self, enable: bool) -> bool:
+ """Enable/disable mock platform."""
+ self._enabled = enable
+ return True
+
+ def read_enabled(self) -> bool:
+ """Check mock enable state."""
+ return self._enabled
+
+ # =========================================================================
+ # Test Helpers (not part of Protocol)
+ # =========================================================================
+
+ def set_odometry(self, odometry: list[float] | None) -> None:
+ """Set odometry directly for testing."""
+ self._odometry = list(odometry) if odometry is not None else None
+
+ def set_velocities_directly(self, velocities: list[float]) -> None:
+ """Set velocities directly for testing (bypasses DOF check)."""
+ self._velocities = list(velocities)
+
+
+def register(registry: TwistBaseAdapterRegistry) -> None:
+ """Register this adapter with the registry."""
+ registry.register("mock_twist_base", MockTwistBaseAdapter)
+
+
+__all__ = ["MockTwistBaseAdapter"]
diff --git a/dimos/hardware/drive_trains/registry.py b/dimos/hardware/drive_trains/registry.py
new file mode 100644
index 0000000000..0a513d2bd4
--- /dev/null
+++ b/dimos/hardware/drive_trains/registry.py
@@ -0,0 +1,98 @@
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""TwistBase adapter registry with auto-discovery.
+
+Automatically discovers and registers twist base adapters from subpackages.
+Each adapter provides a `register()` function in its adapter.py module.
+
+Usage:
+ from dimos.hardware.drive_trains.registry import twist_base_adapter_registry
+
+ # Create an adapter by name
+ adapter = twist_base_adapter_registry.create("mock_twist_base", dof=3)
+ adapter = twist_base_adapter_registry.create("flowbase", dof=3, address="172.6.2.20:11323")
+
+ # List available adapters
+ print(twist_base_adapter_registry.available()) # ["flowbase", "mock_twist_base"]
+"""
+
+from __future__ import annotations
+
+import importlib
+import logging
+import pkgutil
+from typing import TYPE_CHECKING, Any
+
+if TYPE_CHECKING:
+ from dimos.hardware.drive_trains.spec import TwistBaseAdapter
+
+logger = logging.getLogger(__name__)
+
+
+class TwistBaseAdapterRegistry:
+ """Registry for twist base adapters with auto-discovery."""
+
+ def __init__(self) -> None:
+ self._adapters: dict[str, type[TwistBaseAdapter]] = {}
+
+ def register(self, name: str, cls: type[TwistBaseAdapter]) -> None:
+ """Register an adapter class."""
+ self._adapters[name.lower()] = cls
+
+ def create(self, name: str, **kwargs: Any) -> TwistBaseAdapter:
+ """Create an adapter instance by name.
+
+ Args:
+ name: Adapter name (e.g., "mock_twist_base", "flowbase")
+ **kwargs: Arguments passed to adapter constructor
+
+ Returns:
+ Configured adapter instance
+
+ Raises:
+ KeyError: If adapter name is not found
+ """
+ key = name.lower()
+ if key not in self._adapters:
+ raise KeyError(f"Unknown twist base adapter: {name}. Available: {self.available()}")
+
+ return self._adapters[key](**kwargs)
+
+ def available(self) -> list[str]:
+ """List available adapter names."""
+ return sorted(self._adapters.keys())
+
+ def discover(self) -> None:
+ """Discover and register adapters from subpackages.
+
+ Can be called multiple times to pick up newly added adapters.
+ """
+ import dimos.hardware.drive_trains as pkg
+
+ for _, name, ispkg in pkgutil.iter_modules(pkg.__path__):
+ if not ispkg:
+ continue
+ try:
+ module = importlib.import_module(f"dimos.hardware.drive_trains.{name}.adapter")
+ if hasattr(module, "register"):
+ module.register(self)
+ except ImportError as e:
+ logger.warning(f"Skipping twist base adapter {name}: {e}")
+
+
+twist_base_adapter_registry = TwistBaseAdapterRegistry()
+twist_base_adapter_registry.discover()
+
+__all__ = ["TwistBaseAdapterRegistry", "twist_base_adapter_registry"]
diff --git a/dimos/hardware/drive_trains/spec.py b/dimos/hardware/drive_trains/spec.py
new file mode 100644
index 0000000000..0b288edfd4
--- /dev/null
+++ b/dimos/hardware/drive_trains/spec.py
@@ -0,0 +1,95 @@
+# Copyright 2025-2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""TwistBase adapter protocol for velocity-commanded platforms.
+
+Lightweight protocol for mobile bases, quadrupeds, drones, RC cars,
+and any other platform that accepts Twist (velocity) commands.
+
+Virtual joint ordering is defined by the HardwareComponent.joints list.
+For a holonomic base: [vx, vy, wz] maps to joints ["base_vx", "base_vy", "base_wz"].
+"""
+
+from typing import Protocol, runtime_checkable
+
+
+@runtime_checkable
+class TwistBaseAdapter(Protocol):
+ """Protocol for velocity-commanded platform IO.
+
+ Implement this per vendor SDK. All methods use SI units:
+ - Linear velocity: m/s
+ - Angular velocity: rad/s
+ - Position: meters
+ - Angle: radians
+ """
+
+ # --- Connection ---
+
+ def connect(self) -> bool:
+ """Connect to hardware. Returns True on success."""
+ ...
+
+ def disconnect(self) -> None:
+ """Disconnect from hardware."""
+ ...
+
+ def is_connected(self) -> bool:
+ """Check if connected."""
+ ...
+
+ # --- Info ---
+
+ def get_dof(self) -> int:
+ """Get number of velocity DOFs (e.g., 3 for holonomic, 2 for differential)."""
+ ...
+
+ # --- State Reading ---
+
+ def read_velocities(self) -> list[float]:
+ """Read current velocities in virtual joint order (m/s or rad/s)."""
+ ...
+
+ def read_odometry(self) -> list[float] | None:
+ """Read position estimate in virtual joint order.
+
+ For a holonomic base this would be [x, y, theta].
+ Returns None if the platform doesn't provide odometry.
+ """
+ ...
+
+ # --- Control ---
+
+ def write_velocities(self, velocities: list[float]) -> bool:
+ """Command velocities in virtual joint order. Returns success."""
+ ...
+
+ def write_stop(self) -> bool:
+ """Stop all motion immediately (zero velocities)."""
+ ...
+
+ # --- Enable/Disable ---
+
+ def write_enable(self, enable: bool) -> bool:
+ """Enable or disable the platform. Returns success."""
+ ...
+
+ def read_enabled(self) -> bool:
+ """Check if platform is enabled."""
+ ...
+
+
+__all__ = [
+ "TwistBaseAdapter",
+]
diff --git a/dimos/models/depth/metric3d.py b/dimos/models/depth/metric3d.py
deleted file mode 100644
index a668ea321e..0000000000
--- a/dimos/models/depth/metric3d.py
+++ /dev/null
@@ -1,187 +0,0 @@
-# Copyright 2025-2026 Dimensional Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-from dataclasses import dataclass, field
-from functools import cached_property
-from typing import Any
-
-import cv2
-import torch
-
-from dimos.models.base import LocalModel, LocalModelConfig
-
-
-@dataclass
-class Metric3DConfig(LocalModelConfig):
- """Configuration for Metric3D depth estimation model."""
-
- camera_intrinsics: list[float] = field(default_factory=lambda: [500.0, 500.0, 320.0, 240.0])
- """Camera intrinsics [fx, fy, cx, cy]."""
-
- gt_depth_scale: float = 256.0
- """Scale factor for ground truth depth."""
-
- device: str = "cuda" if torch.cuda.is_available() else "cpu"
- """Device to run the model on."""
-
-
-class Metric3D(LocalModel):
- default_config = Metric3DConfig
- config: Metric3DConfig
-
- def __init__(self, **kwargs: object) -> None:
- super().__init__(**kwargs)
- self.intrinsic = self.config.camera_intrinsics
- self.intrinsic_scaled: list[float] | None = None
- self.gt_depth_scale = self.config.gt_depth_scale
- self.pad_info: list[int] | None = None
- self.rgb_origin: Any = None
-
- @cached_property
- def _model(self) -> Any:
- model = torch.hub.load( # type: ignore[no-untyped-call]
- "yvanyin/metric3d", "metric3d_vit_small", pretrain=True
- )
- model = model.to(self.device)
- model.eval()
- return model
-
- """
- Input: Single image in RGB format
- Output: Depth map
- """
-
- def update_intrinsic(self, intrinsic): # type: ignore[no-untyped-def]
- """
- Update the intrinsic parameters dynamically.
- Ensure that the input intrinsic is valid.
- """
- if len(intrinsic) != 4:
- raise ValueError("Intrinsic must be a list or tuple with 4 values: [fx, fy, cx, cy]")
- self.intrinsic = intrinsic
- print(f"Intrinsics updated to: {self.intrinsic}")
-
- def infer_depth(self, img, debug: bool = False): # type: ignore[no-untyped-def]
- if debug:
- print(f"Input image: {img}")
- try:
- if isinstance(img, str):
- print(f"Image type string: {type(img)}")
- img_data = cv2.imread(img)
- if img_data is None:
- raise ValueError(f"Failed to load image from {img}")
- self.rgb_origin = img_data[:, :, ::-1]
- else:
- # print(f"Image type not string: {type(img)}, cv2 conversion assumed to be handled. If not, this will throw an error")
- self.rgb_origin = img
- except Exception as e:
- print(f"Error parsing into infer_depth: {e}")
-
- img = self.rescale_input(img, self.rgb_origin) # type: ignore[no-untyped-call]
-
- with torch.no_grad():
- pred_depth, confidence, output_dict = self._model.inference({"input": img})
-
- # Convert to PIL format
- depth_image = self.unpad_transform_depth(pred_depth) # type: ignore[no-untyped-call]
-
- return depth_image.cpu().numpy()
-
- def save_depth(self, pred_depth) -> None: # type: ignore[no-untyped-def]
- # Save the depth map to a file
- pred_depth_np = pred_depth.cpu().numpy()
- output_depth_file = "output_depth_map.png"
- cv2.imwrite(output_depth_file, pred_depth_np)
- print(f"Depth map saved to {output_depth_file}")
-
- # Adjusts input size to fit pretrained ViT model
- def rescale_input(self, rgb, rgb_origin): # type: ignore[no-untyped-def]
- #### ajust input size to fit pretrained model
- # keep ratio resize
- input_size = (616, 1064) # for vit model
- # input_size = (544, 1216) # for convnext model
- h, w = rgb_origin.shape[:2]
- scale = min(input_size[0] / h, input_size[1] / w)
- rgb = cv2.resize(
- rgb_origin, (int(w * scale), int(h * scale)), interpolation=cv2.INTER_LINEAR
- )
- # remember to scale intrinsic, hold depth
- self.intrinsic_scaled = [
- self.intrinsic[0] * scale,
- self.intrinsic[1] * scale,
- self.intrinsic[2] * scale,
- self.intrinsic[3] * scale,
- ]
- # padding to input_size
- padding = [123.675, 116.28, 103.53]
- h, w = rgb.shape[:2]
- pad_h = input_size[0] - h
- pad_w = input_size[1] - w
- pad_h_half = pad_h // 2
- pad_w_half = pad_w // 2
- rgb = cv2.copyMakeBorder(
- rgb,
- pad_h_half,
- pad_h - pad_h_half,
- pad_w_half,
- pad_w - pad_w_half,
- cv2.BORDER_CONSTANT,
- value=padding,
- )
- self.pad_info = [pad_h_half, pad_h - pad_h_half, pad_w_half, pad_w - pad_w_half]
-
- #### normalize
- mean = torch.tensor([123.675, 116.28, 103.53]).float()[:, None, None]
- std = torch.tensor([58.395, 57.12, 57.375]).float()[:, None, None]
- rgb = torch.from_numpy(rgb.transpose((2, 0, 1))).float()
- rgb = torch.div((rgb - mean), std)
- rgb = rgb[None, :, :, :].to(self.device)
- return rgb
-
- def unpad_transform_depth(self, pred_depth): # type: ignore[no-untyped-def]
- # un pad
- pred_depth = pred_depth.squeeze()
- pred_depth = pred_depth[
- self.pad_info[0] : pred_depth.shape[0] - self.pad_info[1], # type: ignore[index]
- self.pad_info[2] : pred_depth.shape[1] - self.pad_info[3], # type: ignore[index]
- ]
-
- # upsample to original size
- pred_depth = torch.nn.functional.interpolate(
- pred_depth[None, None, :, :],
- self.rgb_origin.shape[:2],
- mode="bilinear",
- ).squeeze()
- ###################### canonical camera space ######################
-
- #### de-canonical transform
- canonical_to_real_scale = (
- self.intrinsic_scaled[0] / 1000.0 # type: ignore[index]
- ) # 1000.0 is the focal length of canonical camera
- pred_depth = pred_depth * canonical_to_real_scale # now the depth is metric
- pred_depth = torch.clamp(pred_depth, 0, 1000)
- return pred_depth
-
- def eval_predicted_depth(self, depth_file, pred_depth) -> None: # type: ignore[no-untyped-def]
- if depth_file is not None:
- gt_depth_np = cv2.imread(depth_file, -1)
- if gt_depth_np is None:
- raise ValueError(f"Failed to load depth file from {depth_file}")
- gt_depth_scaled = gt_depth_np / self.gt_depth_scale
- gt_depth = torch.from_numpy(gt_depth_scaled).float().to(self.device)
- assert gt_depth.shape == pred_depth.shape
-
- mask = gt_depth > 1e-8 # type: ignore[operator]
- abs_rel_err = (torch.abs(pred_depth[mask] - gt_depth[mask]) / gt_depth[mask]).mean() # type: ignore[index]
- print("abs_rel_err:", abs_rel_err.item())
diff --git a/dimos/models/depth/test_metric3d.py b/dimos/models/depth/test_metric3d.py
deleted file mode 100644
index 33e39f6a29..0000000000
--- a/dimos/models/depth/test_metric3d.py
+++ /dev/null
@@ -1,102 +0,0 @@
-from contextlib import contextmanager
-
-import numpy as np
-import pytest
-
-from dimos.models.depth.metric3d import Metric3D
-from dimos.msgs.sensor_msgs import Image
-from dimos.utils.data import get_data
-
-
-@contextmanager
-def skip_xformers_unsupported():
- try:
- yield
- except NotImplementedError as e:
- if "memory_efficient_attention" in str(e):
- pytest.skip(f"xformers not supported on this GPU: {e}")
- raise
-
-
-@pytest.fixture
-def sample_intrinsics() -> list[float]:
- """Sample camera intrinsics [fx, fy, cx, cy]."""
- return [500.0, 500.0, 320.0, 240.0]
-
-@pytest.mark.cuda
-@pytest.mark.gpu
-def test_metric3d_init(sample_intrinsics: list[float]) -> None:
- """Test Metric3D initialization."""
- model = Metric3D(camera_intrinsics=sample_intrinsics)
- assert model.config.camera_intrinsics == sample_intrinsics
- assert model.config.gt_depth_scale == 256.0
- assert model.device == "cuda"
-
-
-@pytest.mark.gpu
-def test_metric3d_update_intrinsic(sample_intrinsics: list[float]) -> None:
- """Test updating camera intrinsics."""
- model = Metric3D(camera_intrinsics=sample_intrinsics)
-
- new_intrinsics = [600.0, 600.0, 400.0, 300.0]
- model.update_intrinsic(new_intrinsics)
- assert model.intrinsic == new_intrinsics
-
-@pytest.mark.gpu
-def test_metric3d_update_intrinsic_invalid(sample_intrinsics: list[float]) -> None:
- """Test that invalid intrinsics raise an error."""
- model = Metric3D(camera_intrinsics=sample_intrinsics)
-
- with pytest.raises(ValueError, match="Intrinsic must be a list"):
- model.update_intrinsic([1.0, 2.0]) # Only 2 values
-
-
-@pytest.mark.cuda
-@pytest.mark.gpu
-def test_metric3d_infer_depth(sample_intrinsics: list[float]) -> None:
- """Test depth inference on a sample image."""
- model = Metric3D(camera_intrinsics=sample_intrinsics)
- model.start()
-
- # Load test image
- image = Image.from_file(get_data("cafe.jpg")).to_rgb()
- rgb_array = image.data
-
- # Run inference
- with skip_xformers_unsupported():
- depth_map = model.infer_depth(rgb_array)
-
- # Verify output
- assert isinstance(depth_map, np.ndarray)
- assert depth_map.shape[:2] == rgb_array.shape[:2] # Same spatial dimensions
- assert depth_map.dtype in [np.float32, np.float64]
- assert depth_map.min() >= 0 # Depth should be non-negative
-
- print(f"Depth map shape: {depth_map.shape}")
- print(f"Depth range: [{depth_map.min():.2f}, {depth_map.max():.2f}]")
-
- model.stop()
-
-
-@pytest.mark.cuda
-@pytest.mark.gpu
-def test_metric3d_multiple_inferences(sample_intrinsics: list[float]) -> None:
- """Test multiple depth inferences."""
- model = Metric3D(camera_intrinsics=sample_intrinsics)
- model.start()
-
- image = Image.from_file(get_data("cafe.jpg")).to_rgb()
- rgb_array = image.data
-
- # Run multiple inferences
- depths = []
- for _ in range(3):
- with skip_xformers_unsupported():
- depth = model.infer_depth(rgb_array)
- depths.append(depth)
-
- # Results should be consistent
- for i in range(1, len(depths)):
- assert np.allclose(depths[0], depths[i], rtol=1e-5)
-
- model.stop()
diff --git a/dimos/protocol/mcp/README.md b/dimos/protocol/mcp/README.md
deleted file mode 100644
index 233e852669..0000000000
--- a/dimos/protocol/mcp/README.md
+++ /dev/null
@@ -1,35 +0,0 @@
-# DimOS MCP Server
-
-Expose DimOS robot skills to Claude Code via Model Context Protocol.
-
-## Setup
-
-```bash
-uv sync --extra base --extra unitree
-```
-
-Add to Claude Code (one command):
-```bash
-claude mcp add --transport stdio dimos --scope project -- python -m dimos.protocol.mcp
-```
-
-
-## Usage
-
-**Terminal 1** - Start DimOS:
-```bash
-uv run dimos run unitree-go2-agentic-mcp
-```
-
-**Claude Code** - Use robot skills:
-```
-> move forward 1 meter
-> go to the kitchen
-> tag this location as "desk"
-```
-
-## How It Works
-
-1. `MCPModule` in the blueprint starts a TCP server on port 9990
-2. Claude Code spawns the bridge (`--bridge`) which connects to `localhost:9990`
-3. Skills are exposed as MCP tools (e.g., `relative_move`, `navigate_with_text`)
diff --git a/dimos/protocol/mcp/bridge.py b/dimos/protocol/mcp/bridge.py
deleted file mode 100644
index 0b09997798..0000000000
--- a/dimos/protocol/mcp/bridge.py
+++ /dev/null
@@ -1,53 +0,0 @@
-# Copyright 2026 Dimensional Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-
-"""MCP Bridge - Connects stdio (Claude Code) to TCP (DimOS Agent)."""
-
-import asyncio
-import os
-import sys
-
-DEFAULT_PORT = 9990
-
-
-async def main() -> None:
- port = int(os.environ.get("MCP_PORT", DEFAULT_PORT))
- host = os.environ.get("MCP_HOST", "localhost")
-
- reader, writer = await asyncio.open_connection(host, port)
- sys.stderr.write(f"MCP Bridge connected to {host}:{port}\n")
-
- async def stdin_to_tcp() -> None:
- loop = asyncio.get_event_loop()
- while True:
- line = await loop.run_in_executor(None, sys.stdin.readline)
- if not line:
- break
- writer.write(line.encode())
- await writer.drain()
-
- async def tcp_to_stdout() -> None:
- while True:
- data = await reader.readline()
- if not data:
- break
- sys.stdout.write(data.decode())
- sys.stdout.flush()
-
- await asyncio.gather(stdin_to_tcp(), tcp_to_stdout())
-
-
-if __name__ == "__main__":
- asyncio.run(main())
diff --git a/dimos/protocol/mcp/mcp.py b/dimos/protocol/mcp/mcp.py
deleted file mode 100644
index 78d19c64db..0000000000
--- a/dimos/protocol/mcp/mcp.py
+++ /dev/null
@@ -1,139 +0,0 @@
-# Copyright 2025-2026 Dimensional Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from __future__ import annotations
-
-import asyncio
-import json
-from typing import TYPE_CHECKING, Any
-
-from dimos.core import Module, rpc
-from dimos.core.rpc_client import RpcCall, RPCClient
-
-if TYPE_CHECKING:
- from dimos.core.module import SkillInfo
-
-
-class MCPModule(Module):
- _skills: list[SkillInfo]
- _rpc_calls: dict[str, RpcCall]
-
- def __init__(self, *args: Any, **kwargs: Any) -> None:
- super().__init__(*args, **kwargs)
- self._skills = []
- self._rpc_calls = {}
- self._server: asyncio.AbstractServer | None = None
- self._server_future: object | None = None
-
- @rpc
- def start(self) -> None:
- super().start()
- self._start_server()
-
- @rpc
- def stop(self) -> None:
- if self._server:
- self._server.close()
- loop = self._loop
- assert loop is not None
- asyncio.run_coroutine_threadsafe(self._server.wait_closed(), loop).result()
- self._server = None
- if self._server_future and hasattr(self._server_future, "cancel"):
- self._server_future.cancel()
- super().stop()
-
- @rpc
- def on_system_modules(self, modules: list[RPCClient]) -> None:
- assert self.rpc is not None
- self._skills = [skill for module in modules for skill in (module.get_skills() or [])]
- self._rpc_calls = {
- skill.func_name: RpcCall(None, self.rpc, skill.func_name, skill.class_name, [])
- for skill in self._skills
- }
-
- def _start_server(self, port: int = 9990) -> None:
- async def handle_client(reader, writer) -> None: # type: ignore[no-untyped-def]
- while True:
- if not (data := await reader.readline()):
- break
- response = await self._handle_request(json.loads(data.decode()))
- writer.write(json.dumps(response).encode() + b"\n")
- await writer.drain()
- writer.close()
-
- async def start_server() -> None:
- self._server = await asyncio.start_server(handle_client, "0.0.0.0", port)
- await self._server.serve_forever()
-
- loop = self._loop
- assert loop is not None
- self._server_future = asyncio.run_coroutine_threadsafe(start_server(), loop)
-
- async def _handle_request(self, request: dict[str, Any]) -> dict[str, Any]:
- method = request.get("method", "")
- params = request.get("params", {}) or {}
- req_id = request.get("id")
- if method == "initialize":
- init_result = {
- "protocolVersion": "2024-11-05",
- "capabilities": {"tools": {}},
- "serverInfo": {"name": "dimensional", "version": "1.0.0"},
- }
- return {"jsonrpc": "2.0", "id": req_id, "result": init_result}
- if method == "tools/list":
- tools = []
- for skill in self._skills:
- schema = json.loads(skill.args_schema)
- tools.append(
- {
- "name": skill.func_name,
- "description": schema.get("description", ""),
- "inputSchema": schema,
- }
- )
- return {"jsonrpc": "2.0", "id": req_id, "result": {"tools": tools}}
- if method == "tools/call":
- name = params.get("name")
- args = params.get("arguments") or {}
- if not isinstance(name, str):
- return {
- "jsonrpc": "2.0",
- "id": req_id,
- "error": {"code": -32602, "message": "Missing or invalid tool name"},
- }
- if not isinstance(args, dict):
- args = {}
- rpc_call = self._rpc_calls.get(name)
- if rpc_call is None:
- return {
- "jsonrpc": "2.0",
- "id": req_id,
- "result": {"content": [{"type": "text", "text": "Skill not found"}]},
- }
- try:
- result = await asyncio.get_event_loop().run_in_executor(
- None, lambda: rpc_call(**args)
- )
- text = str(result) if result is not None else "Completed"
- except Exception as e:
- text = f"Error: {e}"
- return {
- "jsonrpc": "2.0",
- "id": req_id,
- "result": {"content": [{"type": "text", "text": text}]},
- }
- return {
- "jsonrpc": "2.0",
- "id": req_id,
- "error": {"code": -32601, "message": f"Unknown: {method}"},
- }
diff --git a/dimos/protocol/pubsub/test_spec.py b/dimos/protocol/pubsub/test_spec.py
index 0bdfa62628..26c1cf0357 100644
--- a/dimos/protocol/pubsub/test_spec.py
+++ b/dimos/protocol/pubsub/test_spec.py
@@ -17,6 +17,7 @@
import asyncio
from collections.abc import Callable, Generator
from contextlib import contextmanager
+import threading
import time
from typing import Any
@@ -149,10 +150,12 @@ def test_store(pubsub_context: Callable[[], Any], topic: Any, values: list[Any])
with pubsub_context() as x:
# Create a list to capture received messages
received_messages: list[Any] = []
+ msg_event = threading.Event()
# Define callback function that stores received messages
def callback(message: Any, _: Any) -> None:
received_messages.append(message)
+ msg_event.set()
# Subscribe to the topic with our callback
x.subscribe(topic, callback)
@@ -160,10 +163,8 @@ def callback(message: Any, _: Any) -> None:
# Publish the first value to the topic
x.publish(topic, values[0])
- # Give Redis time to process the message if needed
- time.sleep(0.1)
+ assert msg_event.wait(timeout=1.0), "Timed out waiting for message"
- print("RECEIVED", received_messages)
# Verify the callback was called with the correct value
assert len(received_messages) == 1
assert received_messages[0] == values[0]
@@ -178,13 +179,17 @@ def test_multiple_subscribers(
# Create lists to capture received messages for each subscriber
received_messages_1: list[Any] = []
received_messages_2: list[Any] = []
+ event_1 = threading.Event()
+ event_2 = threading.Event()
# Define callback functions
def callback_1(message: Any, topic: Any) -> None:
received_messages_1.append(message)
+ event_1.set()
def callback_2(message: Any, topic: Any) -> None:
received_messages_2.append(message)
+ event_2.set()
# Subscribe both callbacks to the same topic
x.subscribe(topic, callback_1)
@@ -193,8 +198,8 @@ def callback_2(message: Any, topic: Any) -> None:
# Publish the first value
x.publish(topic, values[0])
- # Give Redis time to process the message if needed
- time.sleep(0.1)
+ assert event_1.wait(timeout=1.0), "Timed out waiting for subscriber 1"
+ assert event_2.wait(timeout=1.0), "Timed out waiting for subscriber 2"
# Verify both callbacks received the message
assert len(received_messages_1) == 1
@@ -238,21 +243,24 @@ def test_multiple_messages(
with pubsub_context() as x:
# Create a list to capture received messages
received_messages: list[Any] = []
+ all_received = threading.Event()
+
+ # Publish the rest of the values (after the first one used in basic tests)
+ messages_to_send = values[1:] if len(values) > 1 else values
# Define callback function
def callback(message: Any, topic: Any) -> None:
received_messages.append(message)
+ if len(received_messages) >= len(messages_to_send):
+ all_received.set()
# Subscribe to the topic
x.subscribe(topic, callback)
- # Publish the rest of the values (after the first one used in basic tests)
- messages_to_send = values[1:] if len(values) > 1 else values
for msg in messages_to_send:
x.publish(topic, msg)
- # Give Redis time to process the messages if needed
- time.sleep(0.2)
+ assert all_received.wait(timeout=1.0), "Timed out waiting for all messages"
# Verify all messages were received in order
assert len(received_messages) == len(messages_to_send)
diff --git a/dimos/robot/all_blueprints.py b/dimos/robot/all_blueprints.py
index 19d7e7db29..f33a3a8cf5 100644
--- a/dimos/robot/all_blueprints.py
+++ b/dimos/robot/all_blueprints.py
@@ -27,7 +27,9 @@
"coordinator-combined-xarm6": "dimos.control.blueprints:coordinator_combined_xarm6",
"coordinator-dual-mock": "dimos.control.blueprints:coordinator_dual_mock",
"coordinator-dual-xarm": "dimos.control.blueprints:coordinator_dual_xarm",
+ "coordinator-mobile-manip-mock": "dimos.control.blueprints:coordinator_mobile_manip_mock",
"coordinator-mock": "dimos.control.blueprints:coordinator_mock",
+ "coordinator-mock-twist-base": "dimos.control.blueprints:coordinator_mock_twist_base",
"coordinator-piper": "dimos.control.blueprints:coordinator_piper",
"coordinator-piper-xarm": "dimos.control.blueprints:coordinator_piper_xarm",
"coordinator-teleop-dual": "dimos.control.blueprints:coordinator_teleop_dual",
@@ -55,6 +57,8 @@
"mid360-fastlio-voxels": "dimos.hardware.sensors.lidar.fastlio2.fastlio_blueprints:mid360_fastlio_voxels",
"mid360-fastlio-voxels-native": "dimos.hardware.sensors.lidar.fastlio2.fastlio_blueprints:mid360_fastlio_voxels_native",
"phone-go2-teleop": "dimos.teleop.phone.blueprints:phone_go2_teleop",
+ # "sim-basic": "dimos.robot.sim.blueprints.basic.sim_basic:sim_basic",
+ "sim-nav": "dimos.robot.sim.blueprints.nav.sim_nav:sim_nav",
"simple-phone-teleop": "dimos.teleop.phone.blueprints:simple_phone_teleop",
"uintree-g1-primitive-no-nav": "dimos.robot.unitree.g1.blueprints.primitive.uintree_g1_primitive_no_nav:uintree_g1_primitive_no_nav",
"unitree-g1": "dimos.robot.unitree.g1.blueprints.perceptive.unitree_g1:unitree_g1",
@@ -95,7 +99,6 @@
"cost_mapper": "dimos.mapping.costmapper",
"demo_calculator_skill": "dimos.agents.skills.demo_calculator_skill",
"demo_robot": "dimos.agents.skills.demo_robot",
- "depth_module": "dimos.robot.unitree.depth_module",
"detection3d_module": "dimos.perception.detection.module3D",
"detection_db_module": "dimos.perception.detection.moduleDB",
"fastlio2_module": "dimos.hardware.sensors.lidar.fastlio2.module",
@@ -112,6 +115,7 @@
"keyboard_teleop_module": "dimos.teleop.keyboard.keyboard_teleop_module",
"manipulation_module": "dimos.manipulation.manipulation_module",
"mapper": "dimos.robot.unitree.type.map",
+ "mcp_client": "dimos.agents.mcp.mcp_client",
"mid360_module": "dimos.hardware.sensors.lidar.livox.module",
"navigation_skill": "dimos.agents.skills.navigation",
"object_scene_registration_module": "dimos.perception.object_scene_registration",
@@ -125,6 +129,8 @@
"replanning_a_star_planner": "dimos.navigation.replanning_a_star.module",
"rerun_bridge": "dimos.visualization.rerun.bridge",
"ros_nav": "dimos.navigation.rosnav",
+ # "sim_bridge": "dimos.robot.sim.bridge",
+ # "sim_tf": "dimos.robot.sim.tf_module",
"simple_phone_teleop_module": "dimos.teleop.phone.phone_extensions",
"simulation": "dimos.simulation.manipulators.sim_module",
"spatial_memory": "dimos.perception.spatial_perception",
diff --git a/dimos/robot/drone/README.md b/dimos/robot/drone/README.md
index 6e8ceb4d63..100e2deadd 100644
--- a/dimos/robot/drone/README.md
+++ b/dimos/robot/drone/README.md
@@ -126,7 +126,7 @@ DJI Drone β Wireless β DJI Controller β USB β Android Device β WiFi
```
drone.py # Main orchestrator
βββ connection_module.py # MAVLink communication & skills
-βββ camera_module.py # Video processing & depth estimation
+βββ camera_module.py # Video processing
βββ tracking_module.py # Visual servoing & object tracking
βββ mavlink_connection.py # Low-level MAVLink protocol
βββ dji_video_stream.py # GStreamer video capture
@@ -242,13 +242,6 @@ drone.start()
- **ROS/DimOS**: X=Forward, Y=Left, Z=Up
- Automatic conversion handled internally
-### Depth Estimation
-Camera module can generate depth maps using Metric3D:
-```python
-# Depth published to /drone/depth and /drone/pointcloud
-# Requires GPU with 8GB+ VRAM
-```
-
### Foxglove Visualization
Connect Foxglove Studio to `ws://localhost:8765` to see:
- Live video with tracking overlay
diff --git a/dimos/robot/drone/camera_module.py b/dimos/robot/drone/camera_module.py
index 8ba88fd028..248b1ceb6e 100644
--- a/dimos/robot/drone/camera_module.py
+++ b/dimos/robot/drone/camera_module.py
@@ -15,7 +15,7 @@
# Copyright 2025-2026 Dimensional Inc.
-"""Camera module for drone with depth estimation."""
+"""Camera module for drone."""
import threading
import time
@@ -25,9 +25,8 @@
from dimos.core import In, Module, Out, rpc
from dimos.msgs.geometry_msgs import PoseStamped
-from dimos.msgs.sensor_msgs import Image, ImageFormat
+from dimos.msgs.sensor_msgs import Image
from dimos.msgs.std_msgs import Header
-from dimos.perception.common.utils import colorize_depth
from dimos.utils.logging_config import setup_logger
logger = setup_logger()
@@ -35,15 +34,13 @@
class DroneCameraModule(Module):
"""
- Camera module for drone that processes RGB images to generate depth using Metric3D.
+ Camera module for drone
Subscribes to:
- /video: RGB camera images from drone
Publishes:
- /drone/color_image: RGB camera images
- - /drone/depth_image: Depth images from Metric3D
- - /drone/depth_colorized: Colorized depth
- /drone/camera_info: Camera calibration
- /drone/camera_pose: Camera pose from TF
"""
@@ -53,8 +50,6 @@ class DroneCameraModule(Module):
# Outputs
color_image: Out[Image]
- depth_image: Out[Image]
- depth_colorized: Out[Image]
camera_info: Out[CameraInfo]
camera_pose: Out[PoseStamped]
@@ -64,7 +59,6 @@ def __init__(
world_frame_id: str = "world",
camera_frame_id: str = "camera_link",
base_frame_id: str = "base_link",
- gt_depth_scale: float = 2.0,
**kwargs: Any,
) -> None:
"""Initialize drone camera module.
@@ -73,7 +67,6 @@ def __init__(
camera_intrinsics: [fx, fy, cx, cy]
camera_frame_id: TF frame for camera
base_frame_id: TF frame for drone base
- gt_depth_scale: Depth scale factor
"""
super().__init__(**kwargs)
@@ -84,10 +77,6 @@ def __init__(
self.camera_frame_id = camera_frame_id
self.base_frame_id = base_frame_id
self.world_frame_id = world_frame_id
- self.gt_depth_scale = gt_depth_scale
-
- # Metric3D for depth
- self.metric3d: Any = None # Lazy-loaded Metric3D model
# Processing state
self._running = False
@@ -104,7 +93,6 @@ def start(self) -> None:
logger.warning("Camera module already running")
return
- # Start processing thread for depth (which will init Metric3D and handle video)
self._running = True
self._stop_processing.clear()
self._processing_thread = threading.Thread(target=self._processing_loop, daemon=True)
@@ -121,22 +109,9 @@ def _on_video_frame(self, frame: Image) -> None:
# Publish color image immediately
self.color_image.publish(frame)
- # Store for depth processing
self._latest_frame = frame
def _processing_loop(self) -> None:
- """Process depth estimation in background."""
- # Initialize Metric3D in the background thread
- if self.metric3d is None:
- try:
- from dimos.models.depth.metric3d import Metric3D
-
- self.metric3d = Metric3D(camera_intrinsics=self.camera_intrinsics)
- logger.info("Metric3D initialized")
- except Exception as e:
- logger.warning(f"Metric3D not available: {e}")
- self.metric3d = None
-
# Subscribe to video once connection is available
subscribed = False
while not subscribed and not self._stop_processing.is_set():
@@ -151,12 +126,10 @@ def _processing_loop(self) -> None:
logger.debug(f"Waiting for video connection: {e}")
time.sleep(0.1)
- logger.info("Depth processing loop started")
-
_reported_error = False
while not self._stop_processing.is_set():
- if self._latest_frame is not None and self.metric3d is not None:
+ if self._latest_frame is not None:
try:
frame = self._latest_frame
self._latest_frame = None
@@ -164,34 +137,9 @@ def _processing_loop(self) -> None:
# Get numpy array from Image
img_array = frame.data
- # Generate depth
- depth_array = self.metric3d.infer_depth(img_array) / self.gt_depth_scale
-
# Create header
header = Header(self.camera_frame_id)
- # Publish depth
- depth_msg = Image(
- data=depth_array,
- format=ImageFormat.DEPTH,
- frame_id=header.frame_id,
- ts=header.ts,
- )
- self.depth_image.publish(depth_msg)
-
- # Publish colorized depth
- depth_colorized_array = colorize_depth(
- depth_array, max_depth=10.0, overlay_stats=True
- )
- if depth_colorized_array is not None:
- depth_colorized_msg = Image(
- data=depth_colorized_array,
- format=ImageFormat.RGB,
- frame_id=header.frame_id,
- ts=header.ts,
- )
- self.depth_colorized.publish(depth_colorized_msg)
-
# Publish camera info
self._publish_camera_info(header, img_array.shape)
@@ -201,12 +149,10 @@ def _processing_loop(self) -> None:
except Exception as e:
if not _reported_error:
_reported_error = True
- logger.error(f"Error processing depth: {e}")
+ logger.error(f"Error processing frame: {e}")
else:
time.sleep(0.01)
- logger.info("Depth processing loop stopped")
-
def _publish_camera_info(self, header: Header, shape: tuple[int, ...]) -> None:
"""Publish camera calibration info."""
try:
@@ -279,8 +225,4 @@ def stop(self) -> None:
if self._processing_thread and self._processing_thread.is_alive():
self._processing_thread.join(timeout=2.0)
- # Cleanup Metric3D
- if self.metric3d:
- self.metric3d.cleanup()
-
logger.info("Camera module stopped")
diff --git a/dimos/robot/drone/drone.py b/dimos/robot/drone/drone.py
index 8e72d56ed1..6b9500804f 100644
--- a/dimos/robot/drone/drone.py
+++ b/dimos/robot/drone/drone.py
@@ -51,7 +51,7 @@
class Drone(Robot):
- """Generic MAVLink-based drone with video and depth capabilities."""
+ """Generic MAVLink-based drone with video capabilities."""
def __init__(
self,
@@ -164,8 +164,6 @@ def _deploy_camera(self) -> None:
# Configure LCM transports
self.camera.color_image.transport = core.LCMTransport("/drone/color_image", Image)
- self.camera.depth_image.transport = core.LCMTransport("/drone/depth_image", Image)
- self.camera.depth_colorized.transport = core.LCMTransport("/drone/depth_colorized", Image)
self.camera.camera_info.transport = core.LCMTransport("/drone/camera_info", CameraInfo)
self.camera.camera_pose.transport = core.LCMTransport("/drone/camera_pose", PoseStamped)
diff --git a/dimos/models/depth/__init__.py b/dimos/robot/sim/__init__.py
similarity index 100%
rename from dimos/models/depth/__init__.py
rename to dimos/robot/sim/__init__.py
diff --git a/dimos/protocol/mcp/__init__.py b/dimos/robot/sim/blueprints/__init__.py
similarity index 100%
rename from dimos/protocol/mcp/__init__.py
rename to dimos/robot/sim/blueprints/__init__.py
diff --git a/dimos/robot/sim/blueprints/basic/__init__.py b/dimos/robot/sim/blueprints/basic/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/dimos/robot/sim/blueprints/basic/sim_basic.py b/dimos/robot/sim/blueprints/basic/sim_basic.py
new file mode 100644
index 0000000000..928fac8a77
--- /dev/null
+++ b/dimos/robot/sim/blueprints/basic/sim_basic.py
@@ -0,0 +1,93 @@
+# Copyright 2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Basic DimSim blueprint β connection + visualization."""
+
+import platform
+
+from dimos.constants import DEFAULT_CAPACITY_COLOR_IMAGE
+from dimos.core.blueprints import autoconnect
+from dimos.core.global_config import global_config
+from dimos.core.transport import pSHMTransport
+from dimos.msgs.sensor_msgs import Image
+from dimos.protocol.pubsub.impl.lcmpubsub import LCM
+from dimos.robot.sim.bridge import sim_bridge
+from dimos.robot.sim.tf_module import sim_tf
+from dimos.web.websocket_vis.websocket_vis_module import websocket_vis
+
+_mac_transports: dict[tuple[str, type], pSHMTransport[Image]] = {
+ ("color_image", Image): pSHMTransport(
+ "color_image", default_capacity=DEFAULT_CAPACITY_COLOR_IMAGE
+ ),
+}
+
+_transports_base = (
+ autoconnect() if platform.system() == "Linux" else autoconnect().transports(_mac_transports)
+)
+
+rerun_config = {
+ "pubsubs": [LCM(autoconf=True)],
+ "visual_override": {
+ "world/camera_info": lambda camera_info: camera_info.to_rerun(
+ image_topic="/world/color_image",
+ optical_frame="camera_optical",
+ ),
+ "world/global_map": lambda grid: grid.to_rerun(voxel_size=0.1, mode="boxes"),
+ "world/navigation_costmap": lambda grid: grid.to_rerun(
+ colormap="Accent",
+ z_offset=0.015,
+ opacity=0.2,
+ background="#484981",
+ ),
+ "world/pointcloud": None,
+ "world/depth_image": None,
+ },
+ "static": {
+ "world/tf/base_link": lambda rr: [
+ rr.Boxes3D(
+ half_sizes=[0.3, 0.15, 0.12],
+ colors=[(0, 180, 255)],
+ ),
+ rr.Transform3D(parent_frame="tf#/base_link"),
+ ]
+ },
+}
+
+match global_config.viewer_backend:
+ case "foxglove":
+ from dimos.robot.foxglove_bridge import foxglove_bridge
+
+ with_vis = autoconnect(
+ _transports_base,
+ foxglove_bridge(shm_channels=["/color_image#sensor_msgs.Image"]),
+ )
+ case "rerun":
+ from dimos.visualization.rerun.bridge import rerun_bridge
+
+ with_vis = autoconnect(_transports_base, rerun_bridge(**rerun_config))
+ case "rerun-web":
+ from dimos.visualization.rerun.bridge import rerun_bridge
+
+ with_vis = autoconnect(_transports_base, rerun_bridge(viewer_mode="web", **rerun_config))
+ case _:
+ with_vis = _transports_base
+
+sim_basic = autoconnect(
+ with_vis,
+ sim_bridge(),
+ sim_tf(),
+ websocket_vis(),
+).global_config(n_dask_workers=4, robot_model="dimsim")
+
+__all__ = ["sim_basic"]
diff --git a/dimos/robot/sim/blueprints/nav/__init__.py b/dimos/robot/sim/blueprints/nav/__init__.py
new file mode 100644
index 0000000000..e69de29bb2
diff --git a/dimos/robot/sim/blueprints/nav/sim_nav.py b/dimos/robot/sim/blueprints/nav/sim_nav.py
new file mode 100644
index 0000000000..517611d52f
--- /dev/null
+++ b/dimos/robot/sim/blueprints/nav/sim_nav.py
@@ -0,0 +1,32 @@
+# Copyright 2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""DimSim navigation blueprint β basic + mapping + planning + exploration."""
+
+from dimos.core.blueprints import autoconnect
+from dimos.mapping.costmapper import cost_mapper
+from dimos.mapping.voxels import voxel_mapper
+from dimos.navigation.frontier_exploration import wavefront_frontier_explorer
+from dimos.navigation.replanning_a_star.module import replanning_a_star_planner
+from dimos.robot.sim.blueprints.basic.sim_basic import sim_basic
+
+sim_nav = autoconnect(
+ sim_basic,
+ voxel_mapper(voxel_size=0.1),
+ cost_mapper(),
+ replanning_a_star_planner(),
+ wavefront_frontier_explorer(),
+).global_config(n_dask_workers=6, robot_model="dimsim")
+
+__all__ = ["sim_nav"]
diff --git a/dimos/robot/sim/bridge.py b/dimos/robot/sim/bridge.py
new file mode 100644
index 0000000000..157aaf92ac
--- /dev/null
+++ b/dimos/robot/sim/bridge.py
@@ -0,0 +1,155 @@
+# Copyright 2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""NativeModule wrapper for the DimSim bridge subprocess.
+
+Launches the DimSim bridge (Deno CLI) as a managed subprocess. The bridge
+publishes sensor data (odom, lidar, images) directly to LCM β no Python
+decode/re-encode hop. Python only handles lifecycle and TF (via DimSimTF).
+
+Usage::
+
+ from dimos.robot.sim.bridge import sim_bridge
+ from dimos.robot.sim.tf_module import sim_tf
+ from dimos.core.blueprints import autoconnect
+
+ autoconnect(sim_bridge(), sim_tf(), some_consumer()).build().loop()
+"""
+
+from __future__ import annotations
+
+from dataclasses import dataclass, field
+from pathlib import Path
+import shutil
+from typing import TYPE_CHECKING
+
+from dimos import spec
+from dimos.core.native_module import NativeModule, NativeModuleConfig
+from dimos.utils.logging_config import setup_logger
+
+if TYPE_CHECKING:
+ from dimos.core import In, Out
+ from dimos.msgs.geometry_msgs import PoseStamped, Twist
+ from dimos.msgs.sensor_msgs import CameraInfo, Image, PointCloud2
+
+logger = setup_logger()
+
+
+def _find_cli_script() -> Path | None:
+ """Auto-detect DimSim/dimos-cli/cli.ts relative to this repo."""
+ repo_root = Path(__file__).resolve().parents[4] # dimos/dimos/robot/sim -> repo
+ candidate = repo_root / "DimSim" / "dimos-cli" / "cli.ts"
+ return candidate if candidate.exists() else None
+
+
+def _find_deno() -> str:
+ """Find the deno binary."""
+ return shutil.which("deno") or str(Path.home() / ".deno" / "bin" / "deno")
+
+
+@dataclass(kw_only=True)
+class DimSimBridgeConfig(NativeModuleConfig):
+ """Configuration for the DimSim bridge subprocess."""
+
+ # Set to deno binary β resolved in _resolve_paths().
+ executable: str = "deno"
+ build_command: str | None = None
+ cwd: str | None = None
+
+ scene: str = "apt"
+ port: int = 8090
+ cli_script: str | None = None
+
+ # These fields are handled via extra_args, not to_cli_args().
+ cli_exclude: frozenset[str] = frozenset({"scene", "port", "cli_script"})
+
+ # Populated by _resolve_paths() β deno run args + dev subcommand + scene/port.
+ extra_args: list[str] = field(default_factory=list)
+
+
+class DimSimBridge(NativeModule, spec.Camera, spec.Pointcloud):
+ """NativeModule that manages the DimSim bridge subprocess.
+
+ The bridge (Deno process) handles Browser-LCM translation and publishes
+ sensor data directly to LCM. Ports declared here exist for blueprint
+ wiring / autoconnect but data flows through LCM, not Python.
+ """
+
+ config: DimSimBridgeConfig
+ default_config = DimSimBridgeConfig
+
+ # Sensor outputs (bridge publishes these directly to LCM)
+ odom: Out[PoseStamped]
+ color_image: Out[Image]
+ depth_image: Out[Image]
+ lidar: Out[PointCloud2]
+ pointcloud: Out[PointCloud2]
+ camera_info: Out[CameraInfo]
+
+ # Control input (consumers publish cmd_vel to LCM, bridge reads it)
+ cmd_vel: In[Twist]
+
+ def _resolve_paths(self) -> None:
+ """Resolve executable and build extra_args.
+
+ Prefers globally installed ``dimsim`` CLI (from JSR). Falls back to
+ running the local ``DimSim/dimos-cli/cli.ts`` via Deno for development.
+ """
+ dev_args = ["dev", "--scene", self.config.scene, "--port", str(self.config.port)]
+
+ # 1. Prefer globally installed dimsim CLI (deno install jsr:@antim/dimsim)
+ global_dimsim = shutil.which("dimsim")
+ if global_dimsim:
+ logger.info(f"Using global dimsim CLI: {global_dimsim}")
+ self.config.executable = global_dimsim
+ self.config.extra_args = dev_args
+ self.config.cwd = None
+ return
+
+ # 2. Fall back to local deno + cli.ts (development mode)
+ script = self.config.cli_script
+ if script and Path(script).exists():
+ cli_ts = str(Path(script).resolve())
+ else:
+ found = _find_cli_script()
+ if found:
+ cli_ts = str(found)
+ else:
+ raise FileNotFoundError(
+ "Cannot find DimSim. Install globally with:\n"
+ " deno install -gAf --unstable-net jsr:@antim/dimsim\n"
+ " dimsim setup && dimsim scene install apt"
+ )
+
+ self.config.executable = _find_deno()
+ self.config.extra_args = [
+ "run",
+ "--allow-all",
+ "--unstable-net",
+ cli_ts,
+ *dev_args,
+ ]
+ self.config.cwd = None
+
+ def _maybe_build(self) -> None:
+ """No build step needed for DimSim bridge."""
+
+ def _collect_topics(self) -> dict[str, str]:
+ """Bridge hardcodes LCM channel names β no topic args needed."""
+ return {}
+
+
+sim_bridge = DimSimBridge.blueprint
+
+__all__ = ["DimSimBridge", "DimSimBridgeConfig", "sim_bridge"]
diff --git a/dimos/robot/sim/tf_module.py b/dimos/robot/sim/tf_module.py
new file mode 100644
index 0000000000..9561140e57
--- /dev/null
+++ b/dimos/robot/sim/tf_module.py
@@ -0,0 +1,178 @@
+# Copyright 2026 Dimensional Inc.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Lightweight TF publisher for DimSim.
+
+Subscribes to odometry from the DimSim bridge (via LCM, wired by autoconnect)
+and publishes the transform chain: world -> base_link -> {camera_link ->
+camera_optical, lidar_link}. Also publishes CameraInfo at 1 Hz, forwards
+cmd_vel to the bridge, and exposes a ``move()`` RPC.
+
+This module replaces the TF / camera_info / cmd_vel parts of the old
+DimSimConnection while the NativeModule bridge handles sensor data directly.
+"""
+
+from __future__ import annotations
+
+import math
+from threading import Thread
+import time
+
+from dimos.core import In, Module, Out, rpc
+from dimos.msgs.geometry_msgs import (
+ PoseStamped,
+ Quaternion,
+ Transform,
+ Twist,
+ Vector3,
+)
+from dimos.msgs.sensor_msgs import CameraInfo
+from dimos.utils.logging_config import setup_logger
+
+logger = setup_logger()
+
+# DimSim captures at 960x432 with 80-degree horizontal FOV.
+_DIMSIM_WIDTH = 960
+_DIMSIM_HEIGHT = 432
+_DIMSIM_FOV_DEG = 80
+
+
+def _camera_info_static() -> CameraInfo:
+ """Build CameraInfo for DimSim's virtual camera."""
+ fov_rad = math.radians(_DIMSIM_FOV_DEG)
+ fx = (_DIMSIM_WIDTH / 2) / math.tan(fov_rad / 2)
+ fy = fx # square pixels
+ cx = _DIMSIM_WIDTH / 2.0
+ cy = _DIMSIM_HEIGHT / 2.0
+
+ return CameraInfo(
+ frame_id="camera_optical",
+ height=_DIMSIM_HEIGHT,
+ width=_DIMSIM_WIDTH,
+ distortion_model="plumb_bob",
+ D=[0.0, 0.0, 0.0, 0.0, 0.0],
+ K=[fx, 0.0, cx, 0.0, fy, cy, 0.0, 0.0, 1.0],
+ R=[1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0],
+ P=[fx, 0.0, cx, 0.0, 0.0, fy, cy, 0.0, 0.0, 0.0, 1.0, 0.0],
+ binning_x=0,
+ binning_y=0,
+ )
+
+
+class DimSimTF(Module):
+ """Lightweight TF publisher for the DimSim simulator.
+
+ Wired by autoconnect to receive odom from the bridge's LCM output.
+ Publishes TF transforms and camera intrinsics. Exposes ``move()`` RPC
+ for sending cmd_vel to the bridge.
+ """
+
+ # Odom input β autoconnect wires this to DimSimBridge.odom via LCM
+ odom: In[PoseStamped]
+
+ # Outputs
+ camera_info: Out[CameraInfo]
+ cmd_vel: Out[Twist]
+
+ _camera_info_thread: Thread | None = None
+ _latest_odom: PoseStamped | None = None
+ _odom_last_ts: float = 0.0
+ _odom_count: int = 0
+
+ @classmethod
+ def _odom_to_tf(cls, odom: PoseStamped) -> list[Transform]:
+ """Build transform chain from odometry pose.
+
+ Transform tree: world -> base_link -> {camera_link -> camera_optical, lidar_link}
+ """
+ camera_link = Transform(
+ translation=Vector3(0.3, 0.0, 0.0), # camera 30cm forward
+ rotation=Quaternion(0.0, 0.0, 0.0, 1.0),
+ frame_id="base_link",
+ child_frame_id="camera_link",
+ ts=odom.ts,
+ )
+
+ camera_optical = Transform(
+ translation=Vector3(0.0, 0.0, 0.0),
+ rotation=Quaternion(-0.5, 0.5, -0.5, 0.5),
+ frame_id="camera_link",
+ child_frame_id="camera_optical",
+ ts=odom.ts,
+ )
+
+ lidar_link = Transform(
+ translation=Vector3(0.0, 0.0, 0.0),
+ rotation=Quaternion(0.0, 0.0, 0.0, 1.0),
+ frame_id="base_link",
+ child_frame_id="lidar_link",
+ ts=odom.ts,
+ )
+
+ return [
+ Transform.from_pose("base_link", odom),
+ camera_link,
+ camera_optical,
+ lidar_link,
+ ]
+
+ def _on_odom(self, pose: PoseStamped) -> None:
+ """Handle incoming odometry β publish TF transforms."""
+ # Drop out-of-order messages (UDP multicast doesn't guarantee ordering)
+ if pose.ts <= self._odom_last_ts:
+ return
+ self._odom_last_ts = pose.ts
+ self._latest_odom = pose
+ self._odom_count += 1
+
+ transforms = self._odom_to_tf(pose)
+ self.tf.publish(*transforms)
+
+ def _publish_camera_info_loop(self) -> None:
+ """Publish camera intrinsics at 1 Hz."""
+ while self._camera_info_thread is not None:
+ self.camera_info.publish(_camera_info_static())
+ time.sleep(1.0)
+
+ @rpc
+ def start(self) -> None:
+ super().start()
+
+ from reactivex.disposable import Disposable
+
+ self._disposables.add(Disposable(self.odom.subscribe(self._on_odom)))
+
+ self._camera_info_thread = Thread(target=self._publish_camera_info_loop, daemon=True)
+ self._camera_info_thread.start()
+
+ logger.info("DimSimTF started β listening for odom, publishing TF + camera_info")
+
+ @rpc
+ def stop(self) -> None:
+ thread = self._camera_info_thread
+ self._camera_info_thread = None
+ if thread and thread.is_alive():
+ thread.join(timeout=1.0)
+ super().stop()
+
+ @rpc
+ def move(self, twist: Twist, duration: float = 0.0) -> bool:
+ """Send movement command to the simulator via cmd_vel."""
+ self.cmd_vel.publish(twist)
+ return True
+
+
+sim_tf = DimSimTF.blueprint
+
+__all__ = ["DimSimTF", "sim_tf"]
diff --git a/dimos/robot/unitree/depth_module.py b/dimos/robot/unitree/depth_module.py
deleted file mode 100644
index 07f065caea..0000000000
--- a/dimos/robot/unitree/depth_module.py
+++ /dev/null
@@ -1,243 +0,0 @@
-#!/usr/bin/env python3
-
-# Copyright 2025-2026 Dimensional Inc.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-import threading
-import time
-
-from dimos_lcm.sensor_msgs import CameraInfo
-import numpy as np
-
-from dimos.core import In, Module, Out, rpc
-from dimos.core.global_config import GlobalConfig
-from dimos.msgs.sensor_msgs import Image, ImageFormat
-from dimos.utils.logging_config import setup_logger
-
-logger = setup_logger()
-
-
-class DepthModule(Module):
- """
- Depth module for Unitree Go2 that processes RGB images to generate depth using Metric3D.
-
- Subscribes to:
- - /go2/color_image: RGB camera images from Unitree
- - /go2/camera_info: Camera calibration information
-
- Publishes:
- - /go2/depth_image: Depth images generated by Metric3D
- """
-
- # LCM inputs
- color_image: In[Image]
- camera_info: In[CameraInfo]
-
- # LCM outputs
- depth_image: Out[Image]
-
- def __init__( # type: ignore[no-untyped-def]
- self,
- gt_depth_scale: float = 0.5,
- cfg: GlobalConfig | None = None,
- **kwargs,
- ) -> None:
- """
- Initialize Depth Module.
-
- Args:
- gt_depth_scale: Ground truth depth scaling factor
- """
- super().__init__(**kwargs)
-
- self.camera_intrinsics = None
- self.gt_depth_scale = gt_depth_scale
- self.metric3d = None
- self._camera_info_received = False
-
- # Processing state
- self._running = False
- self._latest_frame = None
- self._last_image = None
- self._last_timestamp = None
- self._last_depth = None
- self._cannot_process_depth = False
-
- # Threading
- self._processing_thread: threading.Thread | None = None
- self._stop_processing = threading.Event()
-
- if cfg:
- if cfg.simulation:
- self.gt_depth_scale = 1.0
-
- @rpc
- def start(self) -> None:
- super().start()
-
- if self._running:
- logger.warning("Camera module already running")
- return
-
- # Set running flag before starting
- self._running = True
-
- # Subscribe to video and camera info inputs
- self.color_image.subscribe(self._on_video)
- self.camera_info.subscribe(self._on_camera_info)
-
- # Start processing thread
- self._start_processing_thread()
-
- logger.info("Depth module started")
-
- @rpc
- def stop(self) -> None:
- if not self._running:
- return
-
- self._running = False
- self._stop_processing.set()
-
- # Wait for thread to finish
- if self._processing_thread and self._processing_thread.is_alive():
- self._processing_thread.join(timeout=2.0)
-
- super().stop()
-
- def _on_camera_info(self, msg: CameraInfo) -> None:
- """Process camera info to extract intrinsics."""
- if self.metric3d is not None:
- return # Already initialized
-
- try:
- # Extract intrinsics from camera matrix K
- K = msg.K
- fx = K[0]
- fy = K[4]
- cx = K[2]
- cy = K[5]
-
- self.camera_intrinsics = [fx, fy, cx, cy] # type: ignore[assignment]
-
- # Initialize Metric3D with camera intrinsics
- from dimos.models.depth.metric3d import Metric3D
-
- self.metric3d = Metric3D(camera_intrinsics=self.camera_intrinsics) # type: ignore[assignment]
- self._camera_info_received = True
-
- logger.info(
- f"Initialized Metric3D with intrinsics from camera_info: {self.camera_intrinsics}"
- )
-
- except Exception as e:
- logger.error(f"Error processing camera info: {e}")
-
- def _on_video(self, msg: Image) -> None:
- """Store latest video frame for processing."""
- if not self._running:
- return
-
- # Simply store the latest frame - processing happens in main loop
- self._latest_frame = msg # type: ignore[assignment]
- logger.debug(
- f"Received video frame: format={msg.format}, shape={msg.data.shape if hasattr(msg.data, 'shape') else 'unknown'}"
- )
-
- def _start_processing_thread(self) -> None:
- """Start the processing thread."""
- self._stop_processing.clear()
- self._processing_thread = threading.Thread(target=self._main_processing_loop, daemon=True)
- self._processing_thread.start()
- logger.info("Started depth processing thread")
-
- def _main_processing_loop(self) -> None:
- """Main processing loop that continuously processes latest frames."""
- logger.info("Starting main processing loop")
-
- while not self._stop_processing.is_set():
- # Process latest frame if available
- if self._latest_frame is not None:
- try:
- msg = self._latest_frame
- self._latest_frame = None # Clear to avoid reprocessing
- # Store for publishing
- self._last_image = msg.data
- self._last_timestamp = msg.ts if msg.ts else time.time()
- # Process depth
- self._process_depth(self._last_image)
-
- except Exception as e:
- logger.error(f"Error in main processing loop: {e}", exc_info=True)
- else:
- # Small sleep to avoid busy waiting
- time.sleep(0.001)
-
- logger.info("Main processing loop stopped")
-
- def _process_depth(self, img_array: np.ndarray) -> None: # type: ignore[type-arg]
- """Process depth estimation using Metric3D."""
- if self._cannot_process_depth:
- self._last_depth = None
- return
-
- # Wait for camera info to initialize Metric3D
- if self.metric3d is None:
- logger.debug("Waiting for camera_info to initialize Metric3D")
- return
-
- try:
- logger.debug(f"Processing depth for image shape: {img_array.shape}")
-
- # Generate depth map
- depth_array = self.metric3d.infer_depth(img_array) * self.gt_depth_scale
-
- self._last_depth = depth_array
- logger.debug(f"Generated depth map shape: {depth_array.shape}")
-
- self._publish_depth()
-
- except Exception as e:
- logger.error(f"Error processing depth: {e}")
- self._cannot_process_depth = True
-
- def _publish_depth(self) -> None:
- """Publish depth image."""
- if not self._running:
- return
-
- try:
- # Publish depth image
- if self._last_depth is not None:
- # Convert depth to uint16 (millimeters) for more efficient storage
- # Clamp to valid range [0, 65.535] meters before converting
- depth_clamped = np.clip(self._last_depth, 0, 65.535)
- depth_uint16 = (depth_clamped * 1000).astype(np.uint16)
- depth_msg = Image(
- data=depth_uint16,
- format=ImageFormat.DEPTH16, # Use DEPTH16 format for uint16 depth
- frame_id="camera_link",
- ts=self._last_timestamp,
- )
- self.depth_image.publish(depth_msg)
- logger.debug(f"Published depth image (uint16): shape={depth_uint16.shape}")
-
- except Exception as e:
- logger.error(f"Error publishing depth data: {e}", exc_info=True)
-
-
-depth_module = DepthModule.blueprint
-
-
-__all__ = ["DepthModule", "depth_module"]
diff --git a/dimos/robot/unitree/go2/blueprints/agentic/unitree_go2_agentic_mcp.py b/dimos/robot/unitree/go2/blueprints/agentic/unitree_go2_agentic_mcp.py
index bbc3e4c216..e75b31e511 100644
--- a/dimos/robot/unitree/go2/blueprints/agentic/unitree_go2_agentic_mcp.py
+++ b/dimos/robot/unitree/go2/blueprints/agentic/unitree_go2_agentic_mcp.py
@@ -13,13 +13,17 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+from dimos.agents.mcp.mcp_client import mcp_client
+from dimos.agents.mcp.mcp_server import McpServer
from dimos.core.blueprints import autoconnect
-from dimos.protocol.mcp.mcp import MCPModule
-from dimos.robot.unitree.go2.blueprints.agentic.unitree_go2_agentic import unitree_go2_agentic
+from dimos.robot.unitree.go2.blueprints.agentic._common_agentic import _common_agentic
+from dimos.robot.unitree.go2.blueprints.smart.unitree_go2_spatial import unitree_go2_spatial
unitree_go2_agentic_mcp = autoconnect(
- unitree_go2_agentic,
- MCPModule.blueprint(),
+ unitree_go2_spatial,
+ McpServer.blueprint(),
+ mcp_client(),
+ _common_agentic,
)
__all__ = ["unitree_go2_agentic_mcp"]
diff --git a/dimos/robot/unitree_webrtc/__init__.py b/dimos/robot/unitree_webrtc/__init__.py
index 4524bba226..451aa53128 100644
--- a/dimos/robot/unitree_webrtc/__init__.py
+++ b/dimos/robot/unitree_webrtc/__init__.py
@@ -20,7 +20,6 @@
_ALIAS_MODULES = {
"demo_error_on_name_conflicts": "dimos.robot.unitree.demo_error_on_name_conflicts",
- "depth_module": "dimos.robot.unitree.depth_module",
"keyboard_teleop": "dimos.robot.unitree.keyboard_teleop",
"mujoco_connection": "dimos.robot.unitree.mujoco_connection",
"type": "dimos.robot.unitree.type",
diff --git a/dimos/types/manipulation.py b/dimos/types/manipulation.py
index 507b9e9b85..76ad7979f2 100644
--- a/dimos/types/manipulation.py
+++ b/dimos/types/manipulation.py
@@ -80,7 +80,7 @@ class ObjectData(TypedDict, total=False):
# Basic detection information
object_id: int # Unique ID for the object
bbox: list[float] # Bounding box [x1, y1, x2, y2]
- depth: float # Depth in meters from Metric3d
+ depth: float # Depth in meters
confidence: float # Detection confidence
class_id: int # Class ID from the detector
label: str # Semantic label (e.g., 'cup', 'table')
diff --git a/dimos/utils/docs/doclinks.py b/dimos/utils/docs/doclinks.py
index 67d5897b28..2cf5d1702f 100644
--- a/dimos/utils/docs/doclinks.py
+++ b/dimos/utils/docs/doclinks.py
@@ -30,6 +30,7 @@
import re
import subprocess
import sys
+import time
from typing import Any
@@ -78,7 +79,7 @@ def get_git_tracked_files(root: Path) -> list[Path]:
return []
-def build_file_index(root: Path) -> dict[str, list[Path]]:
+def build_file_index(root: Path, tracked_files: list[Path] | None = None) -> dict[str, list[Path]]:
"""
Build an index mapping filename suffixes to full paths.
@@ -89,7 +90,8 @@ def build_file_index(root: Path) -> dict[str, list[Path]]:
- dimos/protocol/service/spec.py
"""
index: dict[str, list[Path]] = defaultdict(list)
- tracked_files = get_git_tracked_files(root)
+ if tracked_files is None:
+ tracked_files = get_git_tracked_files(root)
for rel_path in tracked_files:
parts = rel_path.parts
@@ -102,7 +104,7 @@ def build_file_index(root: Path) -> dict[str, list[Path]]:
return index
-def build_doc_index(root: Path) -> dict[str, list[Path]]:
+def build_doc_index(root: Path, tracked_files: list[Path] | None = None) -> dict[str, list[Path]]:
"""
Build an index mapping lowercase doc names to .md file paths.
@@ -113,7 +115,8 @@ def build_doc_index(root: Path) -> dict[str, list[Path]]:
- "modules" -> [Path("docs/modules/index.md")] (if modules/index.md exists)
"""
index: dict[str, list[Path]] = defaultdict(list)
- tracked_files = get_git_tracked_files(root)
+ if tracked_files is None:
+ tracked_files = get_git_tracked_files(root)
for rel_path in tracked_files:
if rel_path.suffix != ".md":
@@ -144,11 +147,78 @@ def find_symbol_line(file_path: Path, symbol: str) -> int | None:
return None
+# Extensions that indicate a backticked term is a filename, not a symbol
+_FILE_EXTENSIONS = frozenset(
+ (
+ ".py",
+ ".md",
+ ".ts",
+ ".js",
+ ".go",
+ ".rs",
+ ".c",
+ ".h",
+ ".cpp",
+ ".hpp",
+ ".java",
+ ".rb",
+ ".yaml",
+ ".yml",
+ ".json",
+ ".toml",
+ ".sh",
+ ".lua",
+ )
+)
+
+
def extract_other_backticks(line: str, file_ref: str) -> list[str]:
"""Extract other backticked terms from a line, excluding the file reference."""
pattern = r"`([^`]+)`"
matches = re.findall(pattern, line)
- return [m for m in matches if m != file_ref and not m.endswith(".py") and "/" not in m]
+ return [
+ m
+ for m in matches
+ if m != file_ref and "/" not in m and not any(m.endswith(ext) for ext in _FILE_EXTENSIONS)
+ ]
+
+
+def score_path_similarity(candidate: Path, original_path: str) -> int:
+ """Score how well a candidate matches the original link's path.
+
+ Counts common directory names plus a bonus for matching filename.
+ Higher = better match.
+ """
+ orig = Path(original_path)
+ orig_dirs = set(orig.parent.parts)
+ cand_dirs = set(candidate.parent.parts)
+ score = len(orig_dirs & cand_dirs)
+ if candidate.name == orig.name:
+ score += 1
+ return score
+
+
+def pick_best_candidate(candidates: list[Path], original_path: str) -> Path | None:
+ """Pick the best candidate by path similarity. Returns None if tied."""
+ if not candidates:
+ return None
+ if len(candidates) == 1:
+ return candidates[0]
+ scored = sorted(candidates, key=lambda c: score_path_similarity(c, original_path), reverse=True)
+ top = score_path_similarity(scored[0], original_path)
+ second = score_path_similarity(scored[1], original_path)
+ if top > second:
+ return scored[0]
+ return None # Ambiguous tie
+
+
+def resolve_candidates(candidates: list[Path], original_path: str) -> Path | None:
+ """Resolve candidates to a single path. Returns None if 0 or ambiguous."""
+ if len(candidates) == 1:
+ return candidates[0]
+ if len(candidates) > 1:
+ return pick_best_candidate(candidates, original_path)
+ return None
def generate_link(
@@ -245,14 +315,32 @@ def process_markdown(
Returns (new_content, changes, errors).
"""
- changes = []
- errors = []
+ changes: list[str] = []
+ errors: list[str] = []
- # Pattern 1: [`filename`](link) - code file links
+ # Pattern 1: [`filename`](link) - backtick code links with symbol auto-linking
code_pattern = r"\[`([^`]+)`\]\(([^)]*)\)"
- # Pattern 2: [Text](.md) - doc file links
- doc_pattern = r"\[([^\]]+)\]\(\.md\)"
+ # Pattern 2: [Text](url) - all non-backtick, non-image links
+ # (? tuple[Path | None, list[Path]]:
+ """Search for a broken link's target by name in doc_index or file_index."""
+ path = Path(link_path)
+ if path.suffix == ".md":
+ stem = path.stem.lower()
+ if stem == "index":
+ stem = path.parent.name.lower()
+ candidates = doc_index.get(stem, []) if doc_index else []
+ elif path.suffix:
+ # Has a file extension β search file_index by filename
+ candidates = file_index.get(path.name, [])
+ else:
+ # No extension (likely a directory) β no fallback search
+ return None, []
+ return resolve_candidates(candidates, original_ref), candidates
def replace_code_match(match: re.Match[str]) -> str:
file_ref = match.group(1)
@@ -267,18 +355,19 @@ def replace_code_match(match: re.Match[str]) -> str:
if "." not in file_ref and "/" not in file_ref:
return full_match
- # Look up in index
+ # Look up in index, with disambiguation
candidates = file_index.get(file_ref, [])
+ resolved_path = resolve_candidates(candidates, file_ref)
- if len(candidates) == 0:
- errors.append(f"No file matching '{file_ref}' found in codebase")
- return full_match
- elif len(candidates) > 1:
- errors.append(f"'{file_ref}' matches multiple files: {[str(c) for c in candidates]}")
+ if resolved_path is None:
+ if len(candidates) > 1:
+ errors.append(
+ f"'{file_ref}' matches multiple files: {[str(c) for c in candidates]}"
+ )
+ else:
+ errors.append(f"No file matching '{file_ref}' found in codebase")
return full_match
- resolved_path = candidates[0]
-
# Determine line fragment
line_fragment = ""
@@ -313,33 +402,107 @@ def replace_code_match(match: re.Match[str]) -> str:
return new_match
- def replace_doc_match(match: re.Match[str]) -> str:
- """Replace [Text](.md) with resolved doc path."""
- if doc_index is None:
- return match.group(0)
-
+ def replace_link_match(match: re.Match[str]) -> str:
+ """Handle all non-backtick links: doc placeholders, path validation."""
link_text = match.group(1)
+ raw_link = match.group(2)
full_match = match.group(0)
- lookup_key = link_text.lower()
- # Look up in doc index
- candidates = doc_index.get(lookup_key, [])
+ # Skip URLs
+ if raw_link.startswith(("http://", "https://", "mailto:")):
+ return full_match
- if len(candidates) == 0:
- errors.append(f"No doc matching '{link_text}' found")
+ # Skip anchor-only links
+ if raw_link.startswith("#"):
return full_match
- elif len(candidates) > 1:
- errors.append(f"'{link_text}' matches multiple docs: {[str(c) for c in candidates]}")
+
+ # Extract fragment if present
+ fragment = ""
+ link_path = raw_link
+ if "#" in raw_link:
+ link_path, frag = raw_link.split("#", 1)
+ fragment = "#" + frag
+
+ # .md placeholder: [Text](.md) β doc_index lookup by link text
+ if link_path == ".md":
+ if doc_index is None:
+ return full_match
+ lookup_key = link_text.lower()
+ candidates = doc_index.get(lookup_key, [])
+ resolved = resolve_candidates(candidates, lookup_key)
+ if resolved is not None:
+ new_link = generate_link(
+ resolved, root, doc_path, link_mode, github_url, github_ref, fragment
+ )
+ result = f"[{link_text}]({new_link})"
+ if result != full_match:
+ changes.append(f" {link_text}: .md -> {new_link}")
+ return result
+ if len(candidates) > 1:
+ errors.append(
+ f"'{link_text}' matches multiple docs: {[str(c) for c in candidates]}"
+ )
+ else:
+ errors.append(f"No doc matching '{link_text}' found")
return full_match
- resolved_path = candidates[0]
- new_link = generate_link(resolved_path, root, doc_path, link_mode, github_url, github_ref)
- new_match = f"[{link_text}]({new_link})"
+ # Absolute path
+ if link_path.startswith("/"):
+ target = root / link_path.lstrip("/")
+ if target.exists():
+ return full_match # Valid, leave as-is
+
+ # Broken β try fallback search
+ resolved, candidates = _search_fallback(link_path, link_path.lstrip("/"))
+ if resolved is not None:
+ new_link = generate_link(
+ resolved, root, doc_path, link_mode, github_url, github_ref, fragment
+ )
+ changes.append(f" {link_text}: {raw_link} -> {new_link} (fixed broken link)")
+ return f"[{link_text}]({new_link})"
+ if len(candidates) > 1:
+ errors.append(
+ f"Broken link '{raw_link}': ambiguous, matches {[str(c) for c in candidates]}"
+ )
+ else:
+ errors.append(f"Broken link: '{raw_link}' does not exist")
+ return full_match
- if new_match != full_match:
- changes.append(f" {link_text}: .md -> {new_link}")
+ # Relative path β resolve from doc file's directory
+ doc_dir = doc_path.parent
+ resolved_abs = (doc_dir / link_path).resolve()
- return new_match
+ try:
+ rel_to_root = resolved_abs.relative_to(root)
+ except ValueError:
+ errors.append(f"Link '{raw_link}' resolves outside repo root")
+ return full_match
+
+ if resolved_abs.exists():
+ # File exists β convert to appropriate link format
+ new_link = generate_link(
+ rel_to_root, root, doc_path, link_mode, github_url, github_ref, fragment
+ )
+ result = f"[{link_text}]({new_link})"
+ if result != full_match:
+ changes.append(f" {link_text}: {raw_link} -> {new_link}")
+ return result
+
+ # Target doesn't exist β try fallback search
+ resolved, candidates = _search_fallback(link_path, raw_link)
+ if resolved is not None:
+ new_link = generate_link(
+ resolved, root, doc_path, link_mode, github_url, github_ref, fragment
+ )
+ changes.append(f" {link_text}: {raw_link} -> {new_link} (found by search)")
+ return f"[{link_text}]({new_link})"
+ if len(candidates) > 1:
+ errors.append(
+ f"Broken link '{raw_link}': ambiguous, matches {[str(c) for c in candidates]}"
+ )
+ else:
+ errors.append(f"Broken link '{raw_link}': target not found")
+ return full_match
# Split by ignore regions and only process non-ignored parts
regions = split_by_ignore_regions(content)
@@ -347,9 +510,9 @@ def replace_doc_match(match: re.Match[str]) -> str:
for region_content, should_process in regions:
if should_process:
- # Process code links first, then doc links
+ # Process code links first, then all other links
processed = re.sub(code_pattern, replace_code_match, region_content)
- processed = re.sub(doc_pattern, replace_doc_match, processed)
+ processed = re.sub(link_pattern, replace_link_match, processed)
result_parts.append(processed)
else:
result_parts.append(region_content)
@@ -377,6 +540,7 @@ def collect_markdown_files(paths: list[str]) -> list[Path]:
Also auto-links symbols: `Configurable` on same line adds #L fragment.
Supports doc-to-doc linking: [Modules](.md) resolves to modules.md or modules/index.md.
+Validates all file links and fixes broken relative/absolute paths by searching the index.
Usage:
doclinks [options]
@@ -471,8 +635,9 @@ def main() -> None:
sys.exit(1)
print(f"Building file index from {root}...")
- file_index = build_file_index(root)
- doc_index = build_doc_index(root)
+ tracked_files = get_git_tracked_files(root)
+ file_index = build_file_index(root, tracked_files)
+ doc_index = build_doc_index(root, tracked_files)
print(
f"Indexed {sum(len(v) for v in file_index.values())} file paths, {len(doc_index)} doc names"
)
@@ -551,8 +716,6 @@ def on_created(self, event: Any) -> None:
observer.start()
try:
while True:
- import time
-
time.sleep(1)
except KeyboardInterrupt:
observer.stop()
diff --git a/dimos/utils/docs/test_doclinks.py b/dimos/utils/docs/test_doclinks.py
index f1303a2245..968f465cef 100644
--- a/dimos/utils/docs/test_doclinks.py
+++ b/dimos/utils/docs/test_doclinks.py
@@ -21,7 +21,10 @@
build_file_index,
extract_other_backticks,
find_symbol_line,
+ pick_best_candidate,
process_markdown,
+ resolve_candidates,
+ score_path_similarity,
split_by_ignore_regions,
)
import pytest
@@ -520,5 +523,257 @@ def test_ignores_doc_links_in_region(self, file_index, doc_index):
assert "[Configuration](.md) example" in new_content
+class TestPathSimilarity:
+ def test_exact_dir_match(self):
+ """Same directory components should give high score."""
+ candidate = Path("docs/agents/docs/codeblocks.md")
+ score = score_path_similarity(candidate, "docs/agents/docs_agent/codeblocks.md")
+ assert score >= 2 # docs, agents
+
+ def test_partial_match(self):
+ """Some shared dirs should give partial score."""
+ candidate = Path("docs/other/codeblocks.md")
+ score = score_path_similarity(candidate, "docs/agents/docs_agent/codeblocks.md")
+ assert score == 2 # docs dir + filename match
+
+ def test_no_match(self):
+ """Unrelated dirs should give filename-only score."""
+ candidate = Path("src/lib/codeblocks.md")
+ score = score_path_similarity(candidate, "docs/agents/docs_agent/codeblocks.md")
+ assert score == 1 # filename match only, no dir overlap
+
+ def test_pick_best_single(self):
+ """Single candidate always wins."""
+ candidates = [Path("docs/agents/docs/codeblocks.md")]
+ best = pick_best_candidate(candidates, "docs/agents/docs_agent/codeblocks.md")
+ assert best == candidates[0]
+
+ def test_pick_best_disambiguates(self):
+ """Should pick candidate with more directory overlap."""
+ candidates = [
+ Path("docs/other/codeblocks.md"),
+ Path("docs/agents/docs/codeblocks.md"),
+ ]
+ best = pick_best_candidate(candidates, "docs/agents/docs_agent/codeblocks.md")
+ assert best == Path("docs/agents/docs/codeblocks.md")
+
+ def test_pick_best_tie_returns_none(self):
+ """Tied scores should return None."""
+ candidates = [
+ Path("a/x/file.md"),
+ Path("b/x/file.md"),
+ ]
+ best = pick_best_candidate(candidates, "c/x/file.md")
+ assert best is None
+
+
+class TestResolveCandidates:
+ def test_single_candidate(self):
+ candidates = [Path("docs/usage/modules.md")]
+ assert resolve_candidates(candidates, "modules.md") == candidates[0]
+
+ def test_empty_candidates(self):
+ assert resolve_candidates([], "modules.md") is None
+
+ def test_disambiguates(self):
+ candidates = [
+ Path("docs/other/codeblocks.md"),
+ Path("docs/agents/docs/codeblocks.md"),
+ ]
+ result = resolve_candidates(candidates, "docs/agents/docs_agent/codeblocks.md")
+ assert result == Path("docs/agents/docs/codeblocks.md")
+
+ def test_tie_returns_none(self):
+ candidates = [Path("a/x/file.md"), Path("b/x/file.md")]
+ assert resolve_candidates(candidates, "c/x/file.md") is None
+
+
+class TestLinkResolution:
+ def _process(self, content, file_index, doc_index, doc_path=None, link_mode="absolute"):
+ if doc_path is None:
+ doc_path = REPO_ROOT / "docs/usage/test.md"
+ return process_markdown(
+ content,
+ REPO_ROOT,
+ doc_path,
+ file_index,
+ link_mode=link_mode,
+ github_url=None,
+ github_ref="main",
+ doc_index=doc_index,
+ )
+
+ def test_resolves_relative_md_link(self, file_index, doc_index):
+ """Should resolve a valid relative .md link to absolute path."""
+ # docs/usage/configuration.md exists β link from docs/usage/test.md
+ content = "[Configuration](configuration.md)"
+ new_content, changes, errors = self._process(content, file_index, doc_index)
+
+ assert len(errors) == 0
+ assert "configuration.md" in new_content
+
+ def test_validates_absolute_md_link(self, file_index, doc_index):
+ """Valid absolute .md link should be left unchanged."""
+ content = "[Configuration](/docs/usage/configuration.md)"
+ new_content, changes, errors = self._process(content, file_index, doc_index)
+
+ assert len(errors) == 0
+ assert new_content == content
+
+ def test_reports_broken_absolute_md_link(self, file_index, doc_index):
+ """Broken absolute .md link with no match should error."""
+ content = "[Foo](/docs/nonexistent/xyzzy_no_match.md)"
+ new_content, changes, errors = self._process(content, file_index, doc_index)
+
+ assert len(errors) == 1
+ assert "Broken link" in errors[0] or "does not exist" in errors[0]
+
+ def test_searches_broken_relative_link(self, file_index, doc_index):
+ """Broken relative .md link should be resolved by name search if unique."""
+ # Link to a non-existent relative path, but stem matches a known doc
+ content = "[Configuration](../nonexistent/configuration.md)"
+ new_content, changes, errors = self._process(content, file_index, doc_index)
+
+ # Should resolve via search fallback (configuration.md exists)
+ if "configuration" in doc_index and len(doc_index["configuration"]) == 1:
+ assert len(errors) == 0
+ assert len(changes) == 1
+ assert "found by search" in changes[0]
+ else:
+ # Multiple matches β disambiguation should kick in
+ assert len(errors) <= 1
+
+ def test_disambiguates_by_path_similarity(self, file_index, doc_index):
+ """Multiple candidates should be disambiguated by directory overlap."""
+ # Build a custom doc_index with multiple candidates
+ from collections import defaultdict
+
+ custom_doc_index: dict[str, list[Path]] = defaultdict(list)
+ custom_doc_index["testdoc"] = [
+ Path("docs/other/testdoc.md"),
+ Path("docs/agents/docs/testdoc.md"),
+ ]
+
+ content = "[TestDoc](../agents/docs_agent/testdoc.md)"
+ doc_path = REPO_ROOT / "docs/usage/test.md"
+ new_content, changes, errors = process_markdown(
+ content,
+ REPO_ROOT,
+ doc_path,
+ file_index,
+ link_mode="absolute",
+ github_url=None,
+ github_ref="main",
+ doc_index=custom_doc_index,
+ )
+
+ # Should pick docs/agents/docs/testdoc.md (shares "docs", "agents")
+ assert len(errors) == 0
+ assert len(changes) == 1
+ assert "agents/docs/testdoc.md" in new_content
+
+ def test_skips_url_md_links(self, file_index, doc_index):
+ """HTTP(S) .md links should be left untouched."""
+ content = "[External](https://example.com/guide.md)"
+ new_content, changes, errors = self._process(content, file_index, doc_index)
+
+ assert len(errors) == 0
+ assert len(changes) == 0
+ assert new_content == content
+
+ def test_preserves_fragment(self, file_index, doc_index):
+ """Fragment (#section) should be preserved in resolved link."""
+ content = "[Config](configuration.md#advanced)"
+ new_content, changes, errors = self._process(content, file_index, doc_index)
+
+ assert "#advanced" in new_content
+
+ def test_skips_backtick_wrapped(self, file_index, doc_index):
+ """Backtick-wrapped .md link text should be skipped by md_link_pattern."""
+ content = "[`configuration.md`](configuration.md)"
+ new_content, changes, errors = self._process(content, file_index, doc_index)
+
+ # The code_pattern handles backtick links; md_link_pattern sees backticks and skips
+ # No double-processing should occur
+ assert "configuration.md" in new_content
+
+ def test_md_links_in_ignore_region(self, file_index, doc_index):
+ """Links in ignore regions should not be processed."""
+ content = (
+ "[Configuration](configuration.md)\n"
+ "\n"
+ "[Configuration](broken_nonexistent.md)\n"
+ "\n"
+ "[Configuration](configuration.md)"
+ )
+ new_content, changes, errors = self._process(content, file_index, doc_index)
+
+ # The broken link in ignore region should not produce errors
+ assert "broken_nonexistent.md" in new_content # Preserved as-is
+
+ def test_validates_absolute_py_link(self, file_index, doc_index):
+ """Valid absolute .py link (without backticks) should be left unchanged."""
+ content = "[spec](/dimos/protocol/service/spec.py)"
+ new_content, changes, errors = self._process(content, file_index, doc_index)
+
+ assert len(errors) == 0
+ assert new_content == content
+
+ def test_broken_py_link_searches_file_index(self, file_index, doc_index):
+ """Broken .py link should fall back to file_index search."""
+ content = "[spec](/nonexistent/path/service/spec.py)"
+ new_content, changes, errors = self._process(content, file_index, doc_index)
+
+ # service/spec.py is unique in file_index β should resolve
+ # But spec.py alone is ambiguous, so it depends on disambiguation
+ # The fallback searches by filename (spec.py) which has multiple matches
+ # pick_best_candidate should resolve via path similarity
+ if len(errors) == 0:
+ assert "fixed broken link" in changes[0]
+ # If ambiguous, at least we get an error not a silent pass
+ else:
+ assert "Broken link" in errors[0]
+
+ def test_validates_directory_link(self, file_index, doc_index):
+ """Valid directory link should be left unchanged."""
+ content = "[examples](/examples/)"
+ doc_path = REPO_ROOT / "docs/test.md"
+ new_content, changes, errors = process_markdown(
+ content,
+ REPO_ROOT,
+ doc_path,
+ file_index,
+ link_mode="absolute",
+ github_url=None,
+ github_ref="main",
+ doc_index=doc_index,
+ )
+
+ if (REPO_ROOT / "examples").exists():
+ assert len(errors) == 0
+ assert new_content == content
+ else:
+ # Directory doesn't exist β should error
+ assert len(errors) == 1
+
+ def test_skips_image_links(self, file_index, doc_index):
+ """Image links  should not be processed."""
+ content = ""
+ new_content, changes, errors = self._process(content, file_index, doc_index)
+
+ assert len(errors) == 0
+ assert len(changes) == 0
+ assert new_content == content
+
+ def test_skips_mailto_links(self, file_index, doc_index):
+ """mailto: links should be left untouched."""
+ content = "[Email](mailto:test@example.com)"
+ new_content, changes, errors = self._process(content, file_index, doc_index)
+
+ assert len(errors) == 0
+ assert len(changes) == 0
+ assert new_content == content
+
+
if __name__ == "__main__":
pytest.main([__file__, "-v"])
diff --git a/docs/agents/docs/index.md b/docs/agents/docs/index.md
index bec2ce79e6..09dabad7ee 100644
--- a/docs/agents/docs/index.md
+++ b/docs/agents/docs/index.md
@@ -8,7 +8,7 @@ If you're showing an API usage pattern, create a minimal working example that ac
After writing a code block in your markdown file, you can run it by executing
`md-babel-py run document.md`
-more information on this tool is in [codeblocks](/docs/agents/docs_agent/codeblocks.md)
+more information on this tool is in [codeblocks](/docs/agents/docs/codeblocks.md)
# Code or Docs Links
@@ -40,15 +40,15 @@ The `Configurable` class is defined in [`service/spec.py`](/dimos/protocol/servi
### Doc-to-doc references
Use `.md` as the link target:
```markdown
-See [Configuration](/docs/api/configuration.md) for more details.
+See [Configuration](/docs/usage/configuration.md) for more details.
```
Becomes:
```markdown
-See [Configuration](/docs/concepts/configuration.md) for more details.
+See [Configuration](/docs/usage/configuration.md) for more details.
```
-More information on this in [doclinks](/docs/agents/docs_agent/doclinks.md)
+More information on this in [doclinks](/docs/agents/docs/doclinks.md)
# Pikchr
diff --git a/docs/development/adding_a_custom_arm.md b/docs/capabilities/manipulation/adding_a_custom_arm.md
similarity index 100%
rename from docs/development/adding_a_custom_arm.md
rename to docs/capabilities/manipulation/adding_a_custom_arm.md
diff --git a/docs/capabilities/manipulation/readme.md b/docs/capabilities/manipulation/readme.md
index 91dada0395..0d6539b75c 100644
--- a/docs/capabilities/manipulation/readme.md
+++ b/docs/capabilities/manipulation/readme.md
@@ -99,6 +99,10 @@ KeyboardTeleopModule βββ ControlCoordinator βββ ManipulationModule
| XArm6 | 6 | Y | Y | β |
| XArm7 | 7 | Y | Y | Y |
+## Adding a Custom Arm
+
+[guide is here](/docs/capabilities/manipulation/adding_a_custom_arm.md)
+
## Key Files
| File | Description |
diff --git a/docs/capabilities/navigation/readme.md b/docs/capabilities/navigation/readme.md
index af26c07f94..f36d795e62 100644
--- a/docs/capabilities/navigation/readme.md
+++ b/docs/capabilities/navigation/readme.md
@@ -3,8 +3,8 @@
## Non-ROS
-- [Go2 Navigation](native/index.md) β column-carving voxel mapping + slope-based costmap
+- [Go2 Navigation](/docs/capabilities/navigation/native/index.md) β column-carving voxel mapping + slope-based costmap
## ROS
-See [ROS Transports](/docs/api/transports.md) for bridging DimOS streams to ROS topics.
+See [ROS Transports](/docs/usage/transports/index.md) for bridging DimOS streams to ROS topics.
diff --git a/docs/development/README.md b/docs/development/README.md
index 130e86fdaa..87936a100d 100644
--- a/docs/development/README.md
+++ b/docs/development/README.md
@@ -222,7 +222,6 @@ This will save the rerun data to `rerun.json` in the current directory.
## Where is `` located? (Architecture)
* If you want to add a `dimos run ` command see [dimos_run.md](/docs/development/dimos_run.md)
-* If you want to add a camera driver see [depth_camera_integration.md](/docs/development/depth_camera_integration.md)
* For edits to manipulation see [manipulation](/dimos/hardware/manipulators/README.md) and the related modules under `dimos/manipulation/`.
* `dimos/core/`: Is where stuff like `Module`, `In`, `Out`, and `RPC` live.
* `dimos/robot/`: Robot-specific modules live here.
diff --git a/docs/development/depth_camera_integration.md b/docs/development/depth_camera_integration.md
deleted file mode 100644
index e152394262..0000000000
--- a/docs/development/depth_camera_integration.md
+++ /dev/null
@@ -1,147 +0,0 @@
-# Depth Camera Integration Guide
-
-This folder contains camera drivers and modules for RGB-D (depth) cameras such as RealSense and ZED.
-Use this guide to add a new depth camera, wire TF correctly, and publish the required streams.
-
-## Add a New Depth Camera
-
-1) **Create a new driver module**
- - Path: `dimos/hardware/sensors/camera//camera.py`
- - Export a blueprint in `/__init__.py` (match the `realsense` / `zed` pattern).
-
-2) **Define config**
- - Inherit from `ModuleConfig` and `DepthCameraConfig`:
- ```python
- @dataclass
- class MyDepthCameraConfig(ModuleConfig, DepthCameraConfig):
- width: int = 1280
- height: int = 720
- fps: int = 15
- camera_name: str = "camera"
- base_frame_id: str = "base_link"
- base_transform: Transform | None = field(default_factory=default_base_transform)
- align_depth_to_color: bool = True
- enable_depth: bool = True
- enable_pointcloud: bool = False
- pointcloud_fps: float = 5.0
- camera_info_fps: float = 1.0
- ```
-
-3) **Implement the module**
- - Inherit from `DepthCameraHardware` and `Module` (see `RealSenseCamera` / `ZEDCamera`).
- - Provide these outputs (matching `RealSenseCamera` / `ZEDCamera`):
- - `color_image: Out[Image]`
- - `depth_image: Out[Image]`
- - `pointcloud: Out[PointCloud2]` (optional, can be disabled by config)
- - `camera_info: Out[CameraInfo]`
- - `depth_camera_info: Out[CameraInfo]`
- - Implement RPCs:
- - `start()` / `stop()`
- - `get_color_camera_info()` / `get_depth_camera_info()`
- - `get_depth_scale()` (meters per depth unit)
-
-4) **Publish frames**
- - Color images: `Image(format=ImageFormat.RGB, frame_id=_color_optical_frame)`
- - Depth images:
- - If `align_depth_to_color`: use `_color_optical_frame`
- - Else: use `_depth_optical_frame`
- - CameraInfo frame_id must match the image frame_id you publish.
-
-5) **Publish camera info**
- - Build `CameraInfo` from camera intrinsics.
- - Publish at `camera_info_fps`.
-
-6) **Publish pointcloud (optional)**
- - Use `PointCloud2.from_rgbd(color_image, depth_image, camera_info, depth_scale)`.
- - Publish at `pointcloud_fps`.
-
-## TF: Required Frames and Transforms
-
-Frame names are defined by the abstract depth camera spec (`dimos/hardware/sensors/camera/spec.py`).
-Use the properties below to ensure consistent naming:
-
-- `_camera_link`: base link for the camera module (usually `{camera_name}_link`)
-- `_color_frame`: non-optical color frame
-- `_color_optical_frame`: optical color frame
-- `_depth_frame`: non-optical depth frame
-- `_depth_optical_frame`: optical depth frame
-
-Recommended transform chain (publish every frame or at your preferred TF rate):
-
-1) **Mounting transform** (from config):
- - `base_frame_id -> _camera_link`
- - Use `config.base_transform` if provided
-
-2) **Depth frame**
- - `_camera_link -> _depth_frame` (identity unless the camera provides extrinsics)
- - `_depth_frame -> _depth_optical_frame` using `OPTICAL_ROTATION`
-
-3) **Color frame**
- - `_camera_link -> _color_frame` (from extrinsics, or identity if unavailable)
- - `_color_frame -> _color_optical_frame` using `OPTICAL_ROTATION`
-
-Notes:
-- If you align depth to color, keep TFs the same but publish depth images in `_color_optical_frame`.
-- Ensure `color_image.frame_id` and `camera_info.frame_id` match. Same for depth.
-
-## Required Streams / Topics
-
-Use these stream names in your module and attach transports as needed.
-Default LCM topics in `realsense` / `zed` demos are shown below.
-
-| Stream name | Type | Suggested topic | Frame ID source |
-|-------------------|--------------|-------------------------|-----------------|
-| `color_image` | `Image` | `/camera/color` | `_color_optical_frame` |
-| `depth_image` | `Image` | `/camera/depth` | `_depth_optical_frame` or `_color_optical_frame` |
-| `pointcloud` | `PointCloud2`| `/camera/pointcloud` | (derived from CameraInfo) |
-| `camera_info` | `CameraInfo` | `/camera/color_info` | matches `color_image` |
-| `depth_camera_info` | `CameraInfo` | `/camera/depth_info` | matches `depth_image` |
-
-For `ObjectSceneRegistrationModule`, the required inputs are:
-- `color_image`
-- `depth_image`
-- `camera_info`
-- TF tree resolving `target_frame` to `color_image.frame_id`
-
-## Object Scene Registration (Brief Overview)
-
-`ObjectSceneRegistrationModule` consumes synchronized RGB + depth + camera intrinsics and produces:
-- 2D detections (YOLOβE)
-- 3D detections (projected via depth + intrinsics + TF)
-- Overlay annotations and aggregated pointclouds
-
-See:
-- `dimos/perception/object_scene_registration.py`
-- `dimos/perception/demo_object_scene_registration.py`
-
-Quick wiring example:
-
-```python
-from dimos.core.blueprints import autoconnect
-from dimos.hardware.sensors.camera.realsense import realsense_camera
-from dimos.perception.object_scene_registration import object_scene_registration_module
-
-pipeline = autoconnect(
- realsense_camera(enable_pointcloud=False),
- object_scene_registration_module(target_frame="world"),
-)
-```
-
-Run the demo via CLI:
-```bash
-dimos run demo-object-scene-registration
-```
-
-## Foxglove (Viewer)
-
-Install Foxglove from:
-- https://foxglove.dev/download
-
-## Modules and Skills (Short Intro)
-
-- **Modules** are typed components with `In[...]` / `Out[...]` streams and `start()` / `stop()` lifecycles.
-- **Skills** are callable methods (decorated with `@skill`) on any `Module`, automatically discovered by agents.
-
-Reference:
-- Modules overview: `/docs/usage/modules.md`
-- TF fundamentals: `/docs/usage/transforms.md`
diff --git a/docs/platforms/humanoid/g1/index.md b/docs/platforms/humanoid/g1/index.md
new file mode 100644
index 0000000000..797c865b20
--- /dev/null
+++ b/docs/platforms/humanoid/g1/index.md
@@ -0,0 +1,167 @@
+# Unitree G1 β Getting Started
+
+The Unitree G1 is a humanoid robot platform with full-body locomotion, arm gesture control, and agentic capabilities β no ROS required for basic operation.
+
+## Requirements
+
+- Unitree G1 (stock firmware)
+- Ubuntu 22.04/24.04 with CUDA GPU (recommended), or macOS (experimental)
+- Python 3.12
+- ZED camera (mounted at chest height) for perception blueprints
+- ROS 2 for navigation (the G1 navigation stack uses ROS nav)
+
+## Install
+
+First, install system dependencies for your platform:
+- [Ubuntu](/docs/installation/ubuntu.md)
+- [macOS](/docs/installation/osx.md)
+- [Nix](/docs/installation/nix.md)
+
+Then install DimOS:
+
+```bash
+uv venv --python "3.12"
+source .venv/bin/activate
+uv pip install dimos[base,unitree]
+```
+
+## MuJoCo Simulation
+
+No hardware? Start with simulation:
+
+```bash
+uv pip install dimos[base,unitree,sim]
+dimos --simulation run unitree-g1-basic-sim
+```
+
+This runs the G1 in MuJoCo with the native A* navigation stack β same blueprint structure, simulated robot. Opens the command center at [localhost:7779](http://localhost:7779) with Rerun 3D visualization.
+
+## Run on Your G1
+
+```bash
+export ROBOT_IP=
+dimos run unitree-g1-basic
+```
+
+DimOS connects via WebRTC, starts the ROS navigation stack, and opens the command center.
+
+### What's Running
+
+| Module | What It Does |
+|--------|-------------|
+| **G1Connection** | WebRTC connection to the robot β streams video, odometry |
+| **Webcam** | ZED camera capture (stereo left, 15 fps) |
+| **VoxelGridMapper** | Builds a 3D voxel map using column-carving (CUDA accelerated) |
+| **CostMapper** | Converts 3D map β 2D costmap via terrain slope analysis |
+| **WavefrontFrontierExplorer** | Autonomous exploration of unmapped areas |
+| **ROSNav** | ROS 2 navigation integration for path planning |
+| **RerunBridge** | 3D visualization in browser |
+| **WebsocketVis** | Command center at localhost:7779 |
+
+### Send Goals
+
+From the command center ([localhost:7779](http://localhost:7779)):
+- Click on the map to set navigation goals
+- Toggle autonomous exploration
+- Monitor robot pose, costmap, and planned path
+
+## Agentic Control
+
+Natural language control with an LLM agent that understands physical space and can command arm gestures:
+
+```bash
+export OPENAI_API_KEY=
+export ROBOT_IP=
+dimos run unitree-g1-agentic
+```
+
+Then use the human CLI:
+
+```bash
+humancli
+> wave hello
+> explore the room
+> give me a high five
+```
+
+The agent subscribes to camera and spatial memory streams and has access to G1-specific skills including arm gestures and movement modes.
+
+### Arm Gestures
+
+The G1 agent can perform expressive arm gestures:
+
+| Gesture | Description |
+|---------|-------------|
+| Handshake | Perform a handshake gesture with the right hand |
+| HighFive | Give a high five with the right hand |
+| Hug | Perform a hugging gesture with both arms |
+| HighWave | Wave with the hand raised high |
+| Clap | Clap hands together |
+| FaceWave | Wave near the face level |
+| LeftKiss | Blow a kiss with the left hand |
+| ArmHeart | Make a heart shape with both arms overhead |
+| RightHeart | Make a heart gesture with the right hand |
+| HandsUp | Raise both hands up in the air |
+| RightHandUp | Raise only the right hand up |
+| Reject | Make a rejection or "no" gesture |
+| CancelAction | Cancel any current arm action and return to neutral |
+
+### Movement Modes
+
+| Mode | Description |
+|------|-------------|
+| WalkMode | Normal walking |
+| WalkControlWaist | Walking with waist control |
+| RunMode | Running |
+
+## Keyboard Teleop
+
+Direct keyboard control via a pygame-based joystick:
+
+```bash
+export ROBOT_IP=
+dimos run unitree-g1-joystick
+```
+
+## Available Blueprints
+
+| Blueprint | Description |
+|-----------|-------------|
+| `unitree-g1-basic` | Connection + ROS navigation + visualization |
+| `unitree-g1-basic-sim` | Simulation with A* navigation |
+| `unitree-g1` | Navigation + perception + spatial memory |
+| `unitree-g1-sim` | Simulation with perception + spatial memory |
+| `unitree-g1-agentic` | Full stack with LLM agent and G1 skills |
+| `unitree-g1-agentic-sim` | Agentic stack in simulation |
+| `unitree-g1-full` | Agentic + SHM image transport + keyboard teleop |
+| `unitree-g1-joystick` | Navigation + keyboard teleop |
+| `unitree-g1-detection` | Navigation + YOLO person detection and tracking |
+| `unitree-g1-shm` | Navigation + perception with shared memory image transport |
+| `uintree-g1-primitive-no-nav` | Sensors + visualization only (no navigation, base for custom blueprints) |
+
+### Blueprint Hierarchy
+
+Blueprints compose incrementally:
+
+```
+primitive (sensors + vis)
+βββ basic (+ connection + navigation)
+β βββ basic-sim (sim connection + A* nav)
+β βββ joystick (+ keyboard teleop)
+β βββ detection (+ YOLO person tracking)
+βββ perceptive (+ spatial memory + object tracking)
+β βββ sim (sim variant)
+β βββ shm (+ shared memory transport)
+βββ agentic (+ LLM agent + G1 skills)
+ βββ agentic-sim (sim variant)
+ βββ full (+ SHM + keyboard teleop)
+```
+
+## Deep Dive
+
+- [Navigation Stack](/docs/capabilities/navigation/readme.md) β path planning and autonomous exploration
+- [Visualization](/docs/usage/visualization.md) β Rerun, Foxglove, performance tuning
+- [Data Streams](/docs/usage/data_streams) β RxPY streams, backpressure, quality filtering
+- [Transports](/docs/usage/transports/index.md) β LCM, SHM, DDS
+- [Blueprints](/docs/usage/blueprints.md) β composing modules
+- [Agents](/docs/capabilities/agents/readme.md) β LLM agent framework
diff --git a/docs/platforms/quadruped/go2/index.md b/docs/platforms/quadruped/go2/index.md
index 40f32bcdd2..ab9e6c202d 100644
--- a/docs/platforms/quadruped/go2/index.md
+++ b/docs/platforms/quadruped/go2/index.md
@@ -11,9 +11,9 @@ The Unitree Go2 is DimOS's primary reference platform. Full autonomous navigatio
## Install
First, install system dependencies for your platform:
-- [Ubuntu](../../../installation/ubuntu.md)
-- [macOS](../../../installation/osx.md)
-- [Nix](../../../installation/nix.md)
+- [Ubuntu](/docs/installation/ubuntu.md)
+- [macOS](/docs/installation/osx.md)
+- [Nix](/docs/installation/nix.md)
Then install DimOS:
@@ -35,14 +35,33 @@ Opens the command center at [localhost:7779](http://localhost:7779) with Rerun 3
## Run on Your Go2
+### Pre-flight checks
+
+1. Robot is reachable and low latency <10ms, 0% packet loss
+```bash
+ping $ROBOT_IP
+```
+
+2. Built-in obstacle avoidance is on. (DimOS handles path planning, but the onboard obstacle avoidance provides an extra safety layer around tight spots)
+
+3. If video is not in sync with lidar/robot position, sync your clock with an NTP server
+
+```bash
+sudo ntpdate pool.ntp.org
+```
+or
+```bash
+sudo sntp -sS pool.ntp.org
+```
+
+### Ready to run DimOS
+
```bash
export ROBOT_IP=
dimos run unitree-go2
```
-That's it. DimOS connects via WebRTC (no jailbreak required), starts the full navigation stack, and opens the command center.
-
-> **Tip:** Keep the Unitree built-in obstacle avoidance enabled on the robot for now. DimOS handles path planning, but the onboard obstacle avoidance provides an extra safety layer.
+That's it. DimOS connects via WebRTC (no jailbreak required), starts the full navigation stack, and opens the command center in your browser.
### What's Running
@@ -106,8 +125,8 @@ The agent subscribes to camera, LiDAR, and spatial memory streams β it sees wh
## Deep Dive
-- [Navigation Stack](../../../capabilities/navigation/native/index.md) β column-carving voxel mapping, costmap generation, A* planning
-- [Visualization](../../../usage/visualization.md) β Rerun, Foxglove, performance tuning
-- [Data Streams](../../../usage/data_streams/) β RxPY streams, backpressure, quality filtering
-- [Transports](../../../usage/transports/index.md) β LCM, SHM, DDS
-- [Blueprints](../../../usage/blueprints.md) β composing modules
+- [Navigation Stack](/docs/capabilities/navigation/native/index.md) β column-carving voxel mapping, costmap generation, A* planning
+- [Visualization](/docs/usage/visualization.md) β Rerun, Foxglove, performance tuning
+- [Data Streams](/docs/usage/data_streams) β RxPY streams, backpressure, quality filtering
+- [Transports](/docs/usage/transports/index.md) β LCM, SHM, DDS
+- [Blueprints](/docs/usage/blueprints.md) β composing modules
diff --git a/docs/usage/data_streams/README.md b/docs/usage/data_streams/README.md
index dc2ce6c91d..870c25fb34 100644
--- a/docs/usage/data_streams/README.md
+++ b/docs/usage/data_streams/README.md
@@ -6,11 +6,11 @@ Dimos uses reactive streams (RxPY) to handle sensor data. This approach naturall
| Guide | Description |
|----------------------------------------------|---------------------------------------------------------------|
-| [ReactiveX Fundamentals](reactivex.md) | Observables, subscriptions, and disposables |
-| [Advanced Streams](advanced_streams.md) | Backpressure, parallel subscribers, synchronous getters |
-| [Quality-Based Filtering](quality_filter.md) | Select highest quality frames when downsampling streams |
-| [Temporal Alignment](temporal_alignment.md) | Match messages from multiple sensors by timestamp |
-| [Storage & Replay](storage_replay.md) | Record sensor streams to disk and replay with original timing |
+| [ReactiveX Fundamentals](/docs/usage/data_streams/reactivex.md) | Observables, subscriptions, and disposables |
+| [Advanced Streams](/docs/usage/data_streams/advanced_streams.md) | Backpressure, parallel subscribers, synchronous getters |
+| [Quality-Based Filtering](/docs/usage/data_streams/quality_filter.md) | Select highest quality frames when downsampling streams |
+| [Temporal Alignment](/docs/usage/data_streams/temporal_alignment.md) | Match messages from multiple sensors by timestamp |
+| [Storage & Replay](/docs/usage/data_streams/storage_replay.md) | Record sensor streams to disk and replay with original timing |
## Quick Example
diff --git a/docs/usage/data_streams/advanced_streams.md b/docs/usage/data_streams/advanced_streams.md
index 187d432af2..e9d9f1d12d 100644
--- a/docs/usage/data_streams/advanced_streams.md
+++ b/docs/usage/data_streams/advanced_streams.md
@@ -1,6 +1,6 @@
# Advanced Stream Handling
-> **Prerequisite:** Read [ReactiveX Fundamentals](reactivex.md) first for Observable basics.
+> **Prerequisite:** Read [ReactiveX Fundamentals](/docs/usage/data_streams/reactivex.md) first for Observable basics.
## Backpressure and Parallel Subscribers to Hardware
@@ -126,7 +126,7 @@ class MLModel(Module):
Sometimes you don't want a stream, you just want to call a function and get the latest value.
-If you are doing this periodically as a part of a processing loop, it is very likely that your code will be much cleaner and safer using actual reactivex pipeline. So bias towards checking our [reactivex quick guide](reactivex.md) and [official docs](https://rxpy.readthedocs.io/)
+If you are doing this periodically as a part of a processing loop, it is very likely that your code will be much cleaner and safer using actual reactivex pipeline. So bias towards checking our [reactivex quick guide](/docs/usage/data_streams/reactivex.md) and [official docs](https://rxpy.readthedocs.io/)
(TODO we should actually make this example actually executable)
diff --git a/docs/usage/data_streams/temporal_alignment.md b/docs/usage/data_streams/temporal_alignment.md
index 66230c9d54..c428c04e2e 100644
--- a/docs/usage/data_streams/temporal_alignment.md
+++ b/docs/usage/data_streams/temporal_alignment.md
@@ -34,7 +34,7 @@ Below we set up replay of real camera and lidar data from the Unitree Go2 robot.
Stream Setup
-You can read more about [sensor storage here](storage_replay.md) and [LFS data storage here](/docs/development/large_file_management.md).
+You can read more about [sensor storage here](/docs/usage/data_streams/storage_replay.md) and [LFS data storage here](/docs/development/large_file_management.md).
```python session=align no-result
from reactivex import Subject
@@ -196,7 +196,7 @@ plot_alignment_timeline(video_frames, lidar_scans, aligned_pairs, '{output}')
## Combine Frame Alignment with a Quality Filter
-More on [quality filtering here](quality_filter.md).
+More on [quality filtering here](/docs/usage/data_streams/quality_filter.md).
```python session=align
from dimos.msgs.sensor_msgs.Image import Image, sharpness_barrier
diff --git a/docs/usage/lcm.md b/docs/usage/lcm.md
index 99437a2458..d089cfcdd3 100644
--- a/docs/usage/lcm.md
+++ b/docs/usage/lcm.md
@@ -7,7 +7,7 @@ The LCM project provides pubsub clients and code generators for many languages.
Our messages are ported from ROS (they are structurally compatible in order to facilitate easy communication to ROS if needed)
Repo that hosts our message definitions and autogenerators is at [dimos-lcm](https://github.com/dimensionalOS/dimos-lcm/)
-our LCM implementation significantly [outperforms ROS for local communication](/docs/usage/transports.md#benchmarks)
+our LCM implementation significantly [outperforms ROS for local communication](/docs/usage/transports/index.md#benchmarks)
## Supported languages
diff --git a/docs/usage/sensor_streams/README.md b/docs/usage/sensor_streams/README.md
index dc2ce6c91d..0bf61e98ef 100644
--- a/docs/usage/sensor_streams/README.md
+++ b/docs/usage/sensor_streams/README.md
@@ -6,11 +6,11 @@ Dimos uses reactive streams (RxPY) to handle sensor data. This approach naturall
| Guide | Description |
|----------------------------------------------|---------------------------------------------------------------|
-| [ReactiveX Fundamentals](reactivex.md) | Observables, subscriptions, and disposables |
-| [Advanced Streams](advanced_streams.md) | Backpressure, parallel subscribers, synchronous getters |
-| [Quality-Based Filtering](quality_filter.md) | Select highest quality frames when downsampling streams |
-| [Temporal Alignment](temporal_alignment.md) | Match messages from multiple sensors by timestamp |
-| [Storage & Replay](storage_replay.md) | Record sensor streams to disk and replay with original timing |
+| [ReactiveX Fundamentals](/docs/usage/sensor_streams/reactivex.md) | Observables, subscriptions, and disposables |
+| [Advanced Streams](/docs/usage/sensor_streams/advanced_streams.md) | Backpressure, parallel subscribers, synchronous getters |
+| [Quality-Based Filtering](/docs/usage/sensor_streams/quality_filter.md) | Select highest quality frames when downsampling streams |
+| [Temporal Alignment](/docs/usage/sensor_streams/temporal_alignment.md) | Match messages from multiple sensors by timestamp |
+| [Storage & Replay](/docs/usage/sensor_streams/storage_replay.md) | Record sensor streams to disk and replay with original timing |
## Quick Example
diff --git a/docs/usage/sensor_streams/advanced_streams.md b/docs/usage/sensor_streams/advanced_streams.md
index 187d432af2..c2cd0dbfca 100644
--- a/docs/usage/sensor_streams/advanced_streams.md
+++ b/docs/usage/sensor_streams/advanced_streams.md
@@ -1,6 +1,6 @@
# Advanced Stream Handling
-> **Prerequisite:** Read [ReactiveX Fundamentals](reactivex.md) first for Observable basics.
+> **Prerequisite:** Read [ReactiveX Fundamentals](/docs/usage/sensor_streams/reactivex.md) first for Observable basics.
## Backpressure and Parallel Subscribers to Hardware
@@ -126,7 +126,7 @@ class MLModel(Module):
Sometimes you don't want a stream, you just want to call a function and get the latest value.
-If you are doing this periodically as a part of a processing loop, it is very likely that your code will be much cleaner and safer using actual reactivex pipeline. So bias towards checking our [reactivex quick guide](reactivex.md) and [official docs](https://rxpy.readthedocs.io/)
+If you are doing this periodically as a part of a processing loop, it is very likely that your code will be much cleaner and safer using actual reactivex pipeline. So bias towards checking our [reactivex quick guide](/docs/usage/sensor_streams/reactivex.md) and [official docs](https://rxpy.readthedocs.io/)
(TODO we should actually make this example actually executable)
diff --git a/docs/usage/sensor_streams/temporal_alignment.md b/docs/usage/sensor_streams/temporal_alignment.md
index 66230c9d54..7d1ad074f2 100644
--- a/docs/usage/sensor_streams/temporal_alignment.md
+++ b/docs/usage/sensor_streams/temporal_alignment.md
@@ -34,7 +34,7 @@ Below we set up replay of real camera and lidar data from the Unitree Go2 robot.
Stream Setup
-You can read more about [sensor storage here](storage_replay.md) and [LFS data storage here](/docs/development/large_file_management.md).
+You can read more about [sensor storage here](/docs/usage/sensor_streams/storage_replay.md) and [LFS data storage here](/docs/development/large_file_management.md).
```python session=align no-result
from reactivex import Subject
@@ -196,7 +196,7 @@ plot_alignment_timeline(video_frames, lidar_scans, aligned_pairs, '{output}')
## Combine Frame Alignment with a Quality Filter
-More on [quality filtering here](quality_filter.md).
+More on [quality filtering here](/docs/usage/sensor_streams/quality_filter.md).
```python session=align
from dimos.msgs.sensor_msgs.Image import Image, sharpness_barrier
diff --git a/docs/usage/transports.md b/docs/usage/transports.md
deleted file mode 100644
index 4c80776531..0000000000
--- a/docs/usage/transports.md
+++ /dev/null
@@ -1,437 +0,0 @@
-# Transports
-
-Transports connect **module streams** across **process boundaries** and/or **networks**.
-
-* **Module**: a running component (e.g., camera, mapping, nav).
-* **Stream**: a unidirectional flow of messages owned by a module (one broadcaster β many receivers).
-* **Topic**: the name/identifier used by a transport or pubsub backend.
-* **Message**: payload carried on a stream (often `dimos.msgs.*`, but can be bytes / images / pointclouds / etc.).
-
-Each edge in the graph is a **transported stream** (potentially different protocols). Each node is a **module**:
-
-
-
-## What the transport layer guarantees (and what it doesnβt)
-
-Modules **donβt** know or care *how* data moves. They just:
-
-* emit messages (broadcast)
-* subscribe to messages (receive)
-
-A transport is responsible for the mechanics of delivery (IPC, sockets, Redis, ROS 2, etc.).
-
-**Important:** delivery semantics depend on the backend:
-
-* Some are **best-effort** (e.g., UDP multicast / LCM): loss can happen.
-* Some can be **reliable** (e.g., TCP-backed, Redis, some DDS configs) but may add latency/backpressure.
-
-So: treat the API as uniform, but pick a backend whose semantics match the task.
-
----
-
-## Benchmarks
-
-Quick view on performance of our pubsub backends:
-
-```sh skip
-python -m pytest -svm tool -k "not bytes" dimos/protocol/pubsub/benchmark/test_benchmark.py
-```
-
-
-
----
-
-## Abstraction layers
-
-Pikchr
-
-```pikchr output=assets/abstraction_layers.svg fold
-color = white
-fill = none
-linewid = 0.5in
-boxwid = 1.0in
-boxht = 0.4in
-
-# Boxes with labels
-B: box "Blueprints" rad 10px
-arrow
-M: box "Modules" rad 5px
-arrow
-T: box "Transports" rad 5px
-arrow
-P: box "PubSub" rad 5px
-
-# Descriptions below
-text "robot configs" at B.s + (0.1, -0.2in)
-text "camera, nav" at M.s + (0, -0.2in)
-text "LCM, SHM, ROS" at T.s + (0, -0.2in)
-text "pub/sub API" at P.s + (0, -0.2in)
-```
-
-
-
-
-
-
-Weβll go through these layers top-down.
-
----
-
-## Using transports with blueprints
-
-See [Blueprints](blueprints.md) for the blueprint API.
-
-From [`unitree/go2/blueprints/__init__.py`](/dimos/robot/unitree/go2/blueprints/__init__.py).
-
-Example: rebind a few streams from the default `LCMTransport` to `ROSTransport` (defined at [`transport.py`](/dimos/core/transport.py#L226)) so you can visualize in **rviz2**.
-
-```python skip
-nav = autoconnect(
- basic,
- voxel_mapper(voxel_size=0.1),
- cost_mapper(),
- replanning_a_star_planner(),
- wavefront_frontier_explorer(),
-).global_config(n_dask_workers=6, robot_model="unitree_go2")
-
-ros = nav.transports(
- {
- ("lidar", PointCloud2): ROSTransport("lidar", PointCloud2),
- ("global_map", PointCloud2): ROSTransport("global_map", PointCloud2),
- ("odom", PoseStamped): ROSTransport("odom", PoseStamped),
- ("color_image", Image): ROSTransport("color_image", Image),
- }
-)
-```
-
----
-
-## Using transports with modules
-
-Each **stream** on a module can use a different transport. Set `.transport` on the stream **before starting** modules.
-
-```python ansi=false
-import time
-
-from dimos.core import In, Module, start
-from dimos.core.transport import LCMTransport
-from dimos.hardware.sensors.camera.module import CameraModule
-from dimos.msgs.sensor_msgs import Image
-
-
-class ImageListener(Module):
- image: In[Image]
-
- def start(self):
- super().start()
- self.image.subscribe(lambda img: print(f"Received: {img.shape}"))
-
-
-if __name__ == "__main__":
- # Start local cluster and deploy modules to separate processes
- dimos = start(2)
-
- camera = dimos.deploy(CameraModule, frequency=2.0)
- listener = dimos.deploy(ImageListener)
-
- # Choose a transport for the stream (example: LCM typed channel)
- camera.color_image.transport = LCMTransport("/camera/rgb", Image)
-
- # Connect listener input to camera output
- listener.image.connect(camera.color_image)
-
- camera.start()
- listener.start()
-
- time.sleep(2)
- dimos.stop()
-```
-
-
-
-```
-Initialized dimos local cluster with 2 workers, memory limit: auto
-2026-01-24T13:17:50.190559Z [info ] Deploying module. [dimos/core/__init__.py] module=CameraModule
-2026-01-24T13:17:50.218466Z [info ] Deployed module. [dimos/core/__init__.py] module=CameraModule worker_id=1
-2026-01-24T13:17:50.229474Z [info ] Deploying module. [dimos/core/__init__.py] module=ImageListener
-2026-01-24T13:17:50.250199Z [info ] Deployed module. [dimos/core/__init__.py] module=ImageListener worker_id=0
-Received: (480, 640, 3)
-Received: (480, 640, 3)
-Received: (480, 640, 3)
-```
-
-See [Modules](modules.md) for more on module architecture.
-
----
-
-## Inspecting LCM traffic (CLI)
-
-`lcmspy` shows topic frequency/bandwidth stats:
-
-
-
-`dimos topic echo /topic` listens on typed channels like `/topic#pkg.Msg` and decodes automatically:
-
-```sh skip
-Listening on /camera/rgb (inferring from typed LCM channels like '/camera/rgb#pkg.Msg')... (Ctrl+C to stop)
-Image(shape=(480, 640, 3), format=RGB, dtype=uint8, dev=cpu, ts=2026-01-24 20:28:59)
-```
-
----
-
-## Implementing a transport
-
-At the stream layer, a transport is implemented by subclassing `Transport` (see [`core/stream.py`](/dimos/core/stream.py#L83)) and implementing:
-
-* `broadcast(...)`
-* `subscribe(...)`
-
-Your `Transport.__init__` args can be anything meaningful for your backend:
-
-* `(ip, port)`
-* a shared-memory segment name
-* a filesystem path
-* a Redis channel
-
-Encoding is an implementation detail, but we encourage using LCM-compatible message types when possible.
-
-### Encoding helpers
-
-Many of our message types provide `lcm_encode` / `lcm_decode` for compact, language-agnostic binary encoding (often faster than pickle). For details, see [LCM](/docs/usage/lcm.md).
-
----
-
-## PubSub transports
-
-Even though transport can be anything (TCP connection, unix socket) for now all our transport backends implement the `PubSub` interface.
-
-* `publish(topic, message)`
-* `subscribe(topic, callback) -> unsubscribe`
-
-```python
-from dimos.protocol.pubsub.spec import PubSub
-import inspect
-
-print(inspect.getsource(PubSub.publish))
-print(inspect.getsource(PubSub.subscribe))
-```
-
-
-```python
- @abstractmethod
- def publish(self, topic: TopicT, message: MsgT) -> None:
- """Publish a message to a topic."""
- ...
-
- @abstractmethod
- def subscribe(
- self, topic: TopicT, callback: Callable[[MsgT, TopicT], None]
- ) -> Callable[[], None]:
- """Subscribe to a topic with a callback. returns unsubscribe function"""
- ...
-```
-
-Topic/message types are flexible: bytes, JSON, or our ROS-compatible [LCM](/docs/usage/lcm.md) types. We also have pickle-based transports for arbitrary Python objects.
-
-### LCM (UDP multicast)
-
-LCM is UDP multicast. Itβs very fast on a robot LAN, but itβs **best-effort** (packets can drop).
-For local emission it autoconfigures system in a way in which it's more robust and faster then other more common protocols like ROS, DDS
-
-```python
-from dimos.protocol.pubsub.lcmpubsub import LCM, Topic
-from dimos.msgs.geometry_msgs import Vector3
-
-lcm = LCM(autoconf=True)
-lcm.start()
-
-received = []
-topic = Topic("/robot/velocity", Vector3)
-
-lcm.subscribe(topic, lambda msg, t: received.append(msg))
-lcm.publish(topic, Vector3(1.0, 0.0, 0.5))
-
-import time
-time.sleep(0.1)
-
-print(f"Received velocity: x={received[0].x}, y={received[0].y}, z={received[0].z}")
-lcm.stop()
-```
-
-
-```
-Received velocity: x=1.0, y=0.0, z=0.5
-```
-
-### Shared memory (IPC)
-
-Shared memory is highest performance, but only works on the **same machine**.
-
-```python
-from dimos.protocol.pubsub.shmpubsub import PickleSharedMemory
-
-shm = PickleSharedMemory(prefer="cpu")
-shm.start()
-
-received = []
-shm.subscribe("test/topic", lambda msg, topic: received.append(msg))
-shm.publish("test/topic", {"data": [1, 2, 3]})
-
-import time
-time.sleep(0.1)
-
-print(f"Received: {received}")
-shm.stop()
-```
-
-
-```
-Received: [{'data': [1, 2, 3]}]
-```
-
-### DDS Transport
-
-For network communication, DDS uses the Data Distribution Service (DDS) protocol:
-
-```python session=dds_demo ansi=false
-from dataclasses import dataclass
-from cyclonedds.idl import IdlStruct
-
-from dimos.protocol.pubsub.impl.ddspubsub import DDS, Topic
-
-@dataclass
-class SensorReading(IdlStruct):
- value: float
-
-dds = DDS()
-dds.start()
-
-received = []
-sensor_topic = Topic(name="sensors/temperature", data_type=SensorReading)
-
-dds.subscribe(sensor_topic, lambda msg, t: received.append(msg))
-dds.publish(sensor_topic, SensorReading(value=22.5))
-
-import time
-time.sleep(0.1)
-
-print(f"Received: {received}")
-dds.stop()
-```
-
-
-```
-Received: [SensorReading(value=22.5)]
-```
-
----
-
-## A minimal transport: `Memory`
-
-The simplest toy backend is `Memory` (single process). Start from there when implementing a new pubsub backend.
-
-```python
-from dimos.protocol.pubsub.memory import Memory
-
-bus = Memory()
-received = []
-
-unsubscribe = bus.subscribe("sensor/data", lambda msg, topic: received.append(msg))
-
-bus.publish("sensor/data", {"temperature": 22.5})
-bus.publish("sensor/data", {"temperature": 23.0})
-
-print(f"Received {len(received)} messages:")
-for msg in received:
- print(f" {msg}")
-
-unsubscribe()
-```
-
-
-```
-Received 2 messages:
- {'temperature': 22.5}
- {'temperature': 23.0}
-```
-
-See [`memory.py`](/dimos/protocol/pubsub/impl/memory.py) for the complete source.
-
----
-
-## Encode/decode mixins
-
-Transports often need to serialize messages before sending and deserialize after receiving.
-
-`PubSubEncoderMixin` at [`pubsub/spec.py`](/dimos/protocol/pubsub/spec.py#L95) provides a clean way to add encoding/decoding to any pubsub implementation.
-
-### Available mixins
-
-| Mixin | Encoding | Use case |
-|----------------------|-----------------|------------------------------------|
-| `PickleEncoderMixin` | Python pickle | Any Python object, Python-only |
-| `LCMEncoderMixin` | LCM binary | Cross-language (C/C++/Python/Go/β¦) |
-| `JpegEncoderMixin` | JPEG compressed | Image data, reduces bandwidth |
-
-`LCMEncoderMixin` is especially useful: you can use LCM message definitions with *any* transport (not just UDP multicast). See [LCM](/docs/usage/lcm.md) for details.
-
-### Creating a custom mixin
-
-```python session=jsonencoder no-result
-from dimos.protocol.pubsub.spec import PubSubEncoderMixin
-import json
-
-class JsonEncoderMixin(PubSubEncoderMixin[str, dict, bytes]):
- def encode(self, msg: dict, topic: str) -> bytes:
- return json.dumps(msg).encode("utf-8")
-
- def decode(self, msg: bytes, topic: str) -> dict:
- return json.loads(msg.decode("utf-8"))
-```
-
-Combine with a pubsub implementation via multiple inheritance:
-
-```python session=jsonencoder no-result
-from dimos.protocol.pubsub.memory import Memory
-
-class MyJsonPubSub(JsonEncoderMixin, Memory):
- pass
-```
-
-Swap serialization by changing the mixin:
-
-```python session=jsonencoder no-result
-from dimos.protocol.pubsub.spec import PickleEncoderMixin
-
-class MyPicklePubSub(PickleEncoderMixin, Memory):
- pass
-```
-
----
-
-## Testing and benchmarks
-
-### Spec tests
-
-See [`pubsub/test_spec.py`](/dimos/protocol/pubsub/test_spec.py) for the grid tests your new backend should pass.
-
-### Benchmarks
-
-Add your backend to benchmarks to compare in context:
-
-```sh skip
-python -m pytest -svm tool -k "not bytes" dimos/protocol/pubsub/benchmark/test_benchmark.py
-```
-
----
-
-# Available transports
-
-| Transport | Use case | Cross-process | Network | Notes |
-|----------------|-------------------------------------|---------------|---------|--------------------------------------|
-| `Memory` | Testing only, single process | No | No | Minimal reference impl |
-| `SharedMemory` | Multi-process on same machine | Yes | No | Highest throughput (IPC) |
-| `LCM` | Robot LAN broadcast (UDP multicast) | Yes | Yes | Best-effort; can drop packets on LAN |
-| `Redis` | Network pubsub via Redis server | Yes | Yes | Central broker; adds hop |
-| `ROS` | ROS 2 topic communication | Yes | Yes | Integrates with RViz/ROS tools |
-| `DDS` | Cyclone DDS without ROS (WIP) | Yes | Yes | WIP |
diff --git a/docs/usage/transports/index.md b/docs/usage/transports/index.md
index 748cf03aa1..1c8745d117 100644
--- a/docs/usage/transports/index.md
+++ b/docs/usage/transports/index.md
@@ -79,7 +79,7 @@ Weβll go through these layers top-down.
## Using transports with blueprints
-See [Blueprints](blueprints.md) for the blueprint API.
+See [Blueprints](/docs/usage/blueprints.md) for the blueprint API.
From [`unitree/go2/blueprints/__init__.py`](/dimos/robot/unitree/go2/blueprints/__init__.py).
@@ -160,7 +160,7 @@ Received: (480, 640, 3)
Received: (480, 640, 3)
```
-See [Modules](modules.md) for more on module architecture.
+See [Modules](/docs/usage/modules.md) for more on module architecture.
---
diff --git a/onnx/metric3d_vit_small.onnx b/onnx/metric3d_vit_small.onnx
deleted file mode 100644
index bfddd41628..0000000000
--- a/onnx/metric3d_vit_small.onnx
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:14805174265dd721ac3b396bd5ee7190c708cec41150ed298267f6c3126bc060
-size 151333865
diff --git a/pyproject.toml b/pyproject.toml
index 9dea7e1921..6471fd89cd 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -119,6 +119,7 @@ misc = [
# Hardware SDKs
"xarm-python-sdk>=1.17.0",
+ "portal",
]
visualization = [
@@ -141,9 +142,6 @@ agents = [
"openai",
"openai-whisper",
"sounddevice",
-
- # MCP Server
- "mcp>=1.0.0",
]
web = [
diff --git a/uv.lock b/uv.lock
index d971dcfeaa..47083b733e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1791,7 +1791,6 @@ agents = [
{ name = "langchain-ollama" },
{ name = "langchain-openai" },
{ name = "langchain-text-splitters" },
- { name = "mcp" },
{ name = "ollama" },
{ name = "openai" },
{ name = "openai-whisper" },
@@ -1812,7 +1811,6 @@ base = [
{ name = "langchain-openai" },
{ name = "langchain-text-splitters" },
{ name = "lap" },
- { name = "mcp" },
{ name = "moondream" },
{ name = "mujoco" },
{ name = "ollama" },
@@ -1964,6 +1962,7 @@ misc = [
{ name = "onnx" },
{ name = "open-clip-torch" },
{ name = "opencv-contrib-python" },
+ { name = "portal" },
{ name = "python-multipart" },
{ name = "scikit-learn", version = "1.7.2", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
{ name = "scikit-learn", version = "1.8.0", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
@@ -2010,7 +2009,6 @@ unitree = [
{ name = "langchain-openai" },
{ name = "langchain-text-splitters" },
{ name = "lap" },
- { name = "mcp" },
{ name = "moondream" },
{ name = "mujoco" },
{ name = "ollama" },
@@ -2088,7 +2086,6 @@ requires-dist = [
{ name = "llvmlite", specifier = ">=0.42.0" },
{ name = "lxml-stubs", marker = "extra == 'dev'", specifier = ">=0.5.1,<1" },
{ name = "matplotlib", marker = "extra == 'manipulation'", specifier = ">=3.7.1" },
- { name = "mcp", marker = "extra == 'agents'", specifier = ">=1.0.0" },
{ name = "md-babel-py", marker = "extra == 'dev'", specifier = "==1.1.1" },
{ name = "moondream", marker = "extra == 'perception'" },
{ name = "mujoco", marker = "extra == 'sim'", specifier = ">=3.3.4" },
@@ -2121,6 +2118,7 @@ requires-dist = [
{ name = "plotly", marker = "extra == 'manipulation'", specifier = ">=5.9.0" },
{ name = "plum-dispatch", specifier = "==2.5.7" },
{ name = "plum-dispatch", marker = "extra == 'docker'", specifier = "==2.5.7" },
+ { name = "portal", marker = "extra == 'misc'" },
{ name = "pre-commit", marker = "extra == 'dev'", specifier = "==4.2.0" },
{ name = "psycopg2-binary", marker = "extra == 'psql'", specifier = ">=2.9.11" },
{ name = "py-spy", marker = "extra == 'dev'" },
@@ -3208,15 +3206,6 @@ http2 = [
{ name = "h2" },
]
-[[package]]
-name = "httpx-sse"
-version = "0.4.3"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/0f/4c/751061ffa58615a32c31b2d82e8482be8dd4a89154f003147acee90f2be9/httpx_sse-0.4.3.tar.gz", hash = "sha256:9b1ed0127459a66014aec3c56bebd93da3c1bc8bb6618c8082039a44889a755d", size = 15943, upload-time = "2025-10-10T21:48:22.271Z" }
-wheels = [
- { url = "https://files.pythonhosted.org/packages/d2/fd/6668e5aec43ab844de6fc74927e155a3b37bf40d7c3790e49fc0406b6578/httpx_sse-0.4.3-py3-none-any.whl", hash = "sha256:0ac1c9fe3c0afad2e0ebb25a934a59f4c7823b60792691f779fad2c5568830fc", size = 8960, upload-time = "2025-10-10T21:48:21.158Z" },
-]
-
[[package]]
name = "huggingface-hub"
version = "0.36.2"
@@ -4887,31 +4876,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/27/1a/1f68f9ba0c207934b35b86a8ca3aad8395a3d6dd7921c0686e23853ff5a9/mccabe-0.7.0-py2.py3-none-any.whl", hash = "sha256:6c2d30ab6be0e4a46919781807b4f0d834ebdd6c6e3dca0bda5a15f863427b6e", size = 7350, upload-time = "2022-01-24T01:14:49.62Z" },
]
-[[package]]
-name = "mcp"
-version = "1.26.0"
-source = { registry = "https://pypi.org/simple" }
-dependencies = [
- { name = "anyio" },
- { name = "httpx" },
- { name = "httpx-sse" },
- { name = "jsonschema" },
- { name = "pydantic" },
- { name = "pydantic-settings" },
- { name = "pyjwt", extra = ["crypto"] },
- { name = "python-multipart" },
- { name = "pywin32", marker = "sys_platform == 'win32'" },
- { name = "sse-starlette" },
- { name = "starlette" },
- { name = "typing-extensions" },
- { name = "typing-inspection" },
- { name = "uvicorn", marker = "sys_platform != 'emscripten'" },
-]
-sdist = { url = "https://files.pythonhosted.org/packages/fc/6d/62e76bbb8144d6ed86e202b5edd8a4cb631e7c8130f3f4893c3f90262b10/mcp-1.26.0.tar.gz", hash = "sha256:db6e2ef491eecc1a0d93711a76f28dec2e05999f93afd48795da1c1137142c66", size = 608005, upload-time = "2026-01-24T19:40:32.468Z" }
-wheels = [
- { url = "https://files.pythonhosted.org/packages/fd/d9/eaa1f80170d2b7c5ba23f3b59f766f3a0bb41155fbc32a69adfa1adaaef9/mcp-1.26.0-py3-none-any.whl", hash = "sha256:904a21c33c25aa98ddbeb47273033c435e595bbacfdb177f4bd87f6dceebe1ca", size = 233615, upload-time = "2026-01-24T19:40:30.652Z" },
-]
-
[[package]]
name = "md-babel-py"
version = "1.1.1"
@@ -6944,6 +6908,22 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/bf/18/72c216f4ab0c82b907009668f79183ae029116ff0dd245d56ef58aac48e7/polars_runtime_32-1.38.1-cp310-abi3-win_arm64.whl", hash = "sha256:6d07d0cc832bfe4fb54b6e04218c2c27afcfa6b9498f9f6bbf262a00d58cc7c4", size = 41639413, upload-time = "2026-02-06T18:12:22.044Z" },
]
+[[package]]
+name = "portal"
+version = "3.7.4"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "cloudpickle" },
+ { name = "msgpack" },
+ { name = "numpy", version = "2.2.6", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version < '3.11'" },
+ { name = "numpy", version = "2.3.5", source = { registry = "https://pypi.org/simple" }, marker = "python_full_version >= '3.11'" },
+ { name = "psutil" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/57/11/c67a1b771901e4c941fe3dcda763b78a29b6c45308e3ebaf99bac96820d8/portal-3.7.4.tar.gz", hash = "sha256:67234267d1eb319fe790653822d4a8d0e0e5312fb29fd8f440d8287066f478b9", size = 17380, upload-time = "2026-01-12T18:17:45.727Z" }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/c3/14/0f7d227894831d2d7eb7f2c6946e8cad8e86da6135b6f902bb961d948f04/portal-3.7.4-py3-none-any.whl", hash = "sha256:3801a489766d3ec2eb73ca8cefd29c54e166d4cf5cfdf1a079ac93fe1130bedb", size = 23486, upload-time = "2026-01-12T18:17:44.326Z" },
+]
+
[[package]]
name = "portalocker"
version = "3.2.0"
@@ -7693,20 +7673,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217, upload-time = "2025-06-21T13:39:07.939Z" },
]
-[[package]]
-name = "pyjwt"
-version = "2.11.0"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/5c/5a/b46fa56bf322901eee5b0454a34343cdbdae202cd421775a8ee4e42fd519/pyjwt-2.11.0.tar.gz", hash = "sha256:35f95c1f0fbe5d5ba6e43f00271c275f7a1a4db1dab27bf708073b75318ea623", size = 98019, upload-time = "2026-01-30T19:59:55.694Z" }
-wheels = [
- { url = "https://files.pythonhosted.org/packages/6f/01/c26ce75ba460d5cd503da9e13b21a33804d38c2165dec7b716d06b13010c/pyjwt-2.11.0-py3-none-any.whl", hash = "sha256:94a6bde30eb5c8e04fee991062b534071fd1439ef58d2adc9ccb823e7bcd0469", size = 28224, upload-time = "2026-01-30T19:59:54.539Z" },
-]
-
-[package.optional-dependencies]
-crypto = [
- { name = "cryptography" },
-]
-
[[package]]
name = "pylibsrtp"
version = "1.0.0"
|