damassi · damassi · Nov 30, 2025 · Nov 30, 2025
diff --git a/CLAUDE.md b/CLAUDE.md
@@ -6,4 +6,5 @@
 - The project uses `uv`, `ruff` and `mypy`
 - Run commands should be prefixed with `uv`: `uv run ...`
 - Use `asyncio` features, if such is needed
+- Prefer early returns
 - Absolutely no useless comments! Every class and method does not need to be documented (unless it is legitimetly complex or "lib-ish")
diff --git a/README.md b/README.md
@@ -30,7 +30,7 @@ make chat
 make dev
 ```
 
-Additional MCP servers are configured in `agent-chat-cli.config.yaml` and prompts added within the `prompts` folder.
+Additional MCP servers are configured in `agent-chat-cli.config.yaml` and prompts added within the `prompts` folder. By default, MCP servers are loaded dynamically via inference; set `mcp_server_inference: false` to load all servers at startup.
 
 ## Development
 

diff --git a/agent-chat-cli.config.yaml b/agent-chat-cli.config.yaml
@@ -9,6 +9,9 @@ model: haiku
 # Enable streaming responses
 include_partial_messages: true
 
+# Enable dynamic MCP server inference
+mcp_server_inference: true
+
 # Named agents with custom configurations
 # agents:
 #   sample_agent:

diff --git a/src/agent_chat_cli/app.py b/src/agent_chat_cli/app.py
@@ -8,10 +8,10 @@
 from agent_chat_cli.components.chat_history import ChatHistory, MessagePosted
 from agent_chat_cli.components.thinking_indicator import ThinkingIndicator
 from agent_chat_cli.components.user_input import UserInput
-from agent_chat_cli.utils import AgentLoop
-from agent_chat_cli.utils.message_bus import MessageBus
+from agent_chat_cli.system.agent_loop import AgentLoop
+from agent_chat_cli.system.message_bus import MessageBus
+from agent_chat_cli.system.actions import Actions
 from agent_chat_cli.utils.logger import setup_logging
-from agent_chat_cli.utils.actions import Actions
 
 from dotenv import load_dotenv
 
@@ -20,7 +20,7 @@
 
 
 class AgentChatCLIApp(App):
-    CSS_PATH = "utils/styles.tcss"
+    CSS_PATH = "system/styles.tcss"
 
     BINDINGS = [
         Binding("ctrl+c", "quit", "Quit", show=False, priority=True),

diff --git a/src/agent_chat_cli/components/user_input.py b/src/agent_chat_cli/components/user_input.py
@@ -9,7 +9,7 @@
 from agent_chat_cli.components.chat_history import MessagePosted
 from agent_chat_cli.components.thinking_indicator import ThinkingIndicator
 from agent_chat_cli.components.messages import Message
-from agent_chat_cli.utils.actions import Actions
+from agent_chat_cli.system.actions import Actions
 from agent_chat_cli.utils.enums import ControlCommand
 
 

diff --git a/src/agent_chat_cli/docs/architecture.md b/src/agent_chat_cli/docs/architecture.md
@@ -16,24 +16,34 @@ Textual widgets responsible for UI rendering:
 - **UserInput**: Handles user text input and submission
 - **ThinkingIndicator**: Shows when agent is processing
 
-### Utils Layer
+### System Layer
 
-#### Agent Loop (`agent_loop.py`)
+#### Agent Loop (`system/agent_loop.py`)
 Manages the conversation loop with Claude SDK:
 - Maintains async queue for user queries
 - Handles streaming responses
 - Parses SDK messages into structured AgentMessage objects
 - Emits AgentMessageType events (STREAM_EVENT, ASSISTANT, RESULT)
 - Manages session persistence via session_id
+- Supports dynamic MCP server inference and loading
+
+#### MCP Server Inference (`system/mcp_inference.py`)
+Intelligently determines which MCP servers are needed for each query:
+- Uses a persistent Haiku client for fast inference (~1-3s after initial boot)
+- Analyzes user queries to infer required servers
+- Maintains a cached set of inferred servers across conversation
+- Returns only newly needed servers to minimize reconnections
+- Can be disabled via `mcp_server_inference: false` config option
 
-#### Message Bus (`message_bus.py`)
+#### Message Bus (`system/message_bus.py`)
 Routes agent messages to appropriate UI components:
 - Handles streaming text updates
 - Mounts tool use messages
 - Controls thinking indicator state
 - Manages scroll-to-bottom behavior
+- Displays system messages (e.g., MCP server connection notifications)
 
-#### Actions (`actions.py`)
+#### Actions (`system/actions.py`)
 Centralizes all user-initiated actions and controls:
 - **quit()**: Exits the application
 - **query(user_input)**: Sends user query to agent loop queue
@@ -46,15 +56,20 @@ Actions are triggered via:
 - Keybindings in app.py (ESC → action_interrupt, Ctrl+N → action_new)
 - Text commands in user_input.py ("exit", "clear")
 
-#### Config (`config.py`)
+### Utils Layer
+
+#### Config (`utils/config.py`)
 Loads and validates YAML configuration:
 - Filters disabled MCP servers
 - Loads prompts from files
 - Expands environment variables
 - Combines system prompt with MCP server prompts
+- Provides `get_sdk_config()` to filter app-specific config before passing to SDK
 
 ## Data Flow
 
+### Standard Query Flow (with MCP Inference enabled)
+
 ```
 User Input
     ↓
@@ -64,7 +79,16 @@ MessagePosted event → ChatHistory (immediate UI update)
     ↓
 Actions.query(user_input) → AgentLoop.query_queue.put()
     ↓
-Claude SDK (streaming response)
+AgentLoop: MCP Server Inference (if enabled)
+    ↓
+infer_mcp_servers(user_message) → Haiku query
+    ↓
+If new servers needed:
+    - Post SYSTEM message ("Connecting to [servers]...")
+    - Disconnect client
+    - Reconnect with new servers (preserving session_id)
+    ↓
+Claude SDK (streaming response with connected MCP tools)
     ↓
 AgentLoop._handle_message
     ↓
@@ -73,9 +97,26 @@ AgentMessage (typed message) → MessageBus.handle_agent_message
 Match on AgentMessageType:
     - STREAM_EVENT → Update streaming message widget
     - ASSISTANT → Mount tool use widgets
+    - SYSTEM → Display system notification
     - RESULT → Reset thinking indicator
 ```
 
+### Query Flow (with MCP Inference disabled)
+
+```
+User Input
+    ↓
+UserInput.on_input_submitted
+    ↓
+MessagePosted event → ChatHistory (immediate UI update)
+    ↓
+Actions.query(user_input) → AgentLoop.query_queue.put()
+    ↓
+Claude SDK (all servers pre-connected at startup)
+    ↓
+[Same as above from _handle_message onwards]
+```
+
 ### Control Commands Flow
 ```
 User Action (ESC, Ctrl+N, "clear", "exit")
@@ -138,14 +179,38 @@ class Message:
 Configuration is loaded from `agent-chat-cli.config.yaml`:
 - **system_prompt**: Base system prompt (supports file paths)
 - **model**: Claude model to use
-- **include_partial_messages**: Enable streaming
+- **include_partial_messages**: Enable streaming responses (default: true)
+- **mcp_server_inference**: Enable dynamic MCP server inference (default: true)
+  - When `true`: App boots instantly without MCP servers, connects only when needed
+  - When `false`: All enabled MCP servers load at startup (traditional behavior)
 - **mcp_servers**: MCP server configurations (filtered by enabled flag)
 - **agents**: Named agent configurations
 - **disallowed_tools**: Tool filtering
 - **permission_mode**: Permission handling mode
 
 MCP server prompts are automatically appended to the system prompt.
 
+### MCP Server Inference
+
+When `mcp_server_inference: true` (default):
+
+1. **Fast Boot**: App starts without connecting to any MCP servers
+2. **Smart Detection**: Before each query, Haiku analyzes which servers are needed
+3. **Dynamic Loading**: Only connects to newly required servers
+4. **Session Preservation**: Maintains conversation history when reconnecting with new servers
+5. **Performance**: ~1-3s inference latency after initial boot (first query ~8-12s)
+
+Example config:
+```yaml
+mcp_server_inference: true  # or false to disable
+
+mcp_servers:
+  github:
+    description: "Search code, PRs, issues"
+    enabled: true
+    # ... rest of config
+```
+
 ## User Commands
 
 ### Text Commands

diff --git a/src/agent_chat_cli/utils/actions.py → src/agent_chat_cli/system/actions.py b/src/agent_chat_cli/utils/actions.py → src/agent_chat_cli/system/actions.py
@@ -1,4 +1,4 @@
-from agent_chat_cli.utils.agent_loop import AgentLoop
+from agent_chat_cli.system.agent_loop import AgentLoop
 from agent_chat_cli.utils.enums import ControlCommand
 from agent_chat_cli.components.chat_history import ChatHistory
 from agent_chat_cli.components.thinking_indicator import ThinkingIndicator

diff --git a/src/agent_chat_cli/utils/agent_loop.py → src/agent_chat_cli/system/agent_loop.py b/src/agent_chat_cli/utils/agent_loop.py → src/agent_chat_cli/system/agent_loop.py
@@ -13,8 +13,14 @@
     ToolUseBlock,
 )
 
-from agent_chat_cli.utils.config import load_config
+from agent_chat_cli.utils.config import (
+    load_config,
+    get_available_servers,
+    get_sdk_config,
+)
 from agent_chat_cli.utils.enums import AgentMessageType, ContentType, ControlCommand
+from agent_chat_cli.system.mcp_inference import infer_mcp_servers
+from agent_chat_cli.utils.logger import log_json
 
 
 @dataclass
@@ -31,34 +37,93 @@ def __init__(
     ) -> None:
         self.config = load_config()
         self.session_id = session_id
+        self.available_servers = get_available_servers()
+        self.inferred_servers: set[str] = set()
 
-        config_dict = self.config.model_dump()
-        if session_id:
-            config_dict["resume"] = session_id
-
-        self.client = ClaudeSDKClient(options=ClaudeAgentOptions(**config_dict))
+        self.client: ClaudeSDKClient
 
         self.on_message = on_message
         self.query_queue: asyncio.Queue[str | ControlCommand] = asyncio.Queue()
 
         self._running = False
         self.interrupting = False
 
-    async def start(self) -> None:
+    async def _initialize_client(self, mcp_servers: dict) -> None:
+        sdk_config = get_sdk_config(self.config)
+        sdk_config["mcp_servers"] = mcp_servers
+
+        if self.session_id:
+            sdk_config["resume"] = self.session_id
+
+        self.client = ClaudeSDKClient(options=ClaudeAgentOptions(**sdk_config))
+
         await self.client.connect()
 
+    async def start(self) -> None:
+        if self.config.mcp_server_inference:
+            await self._initialize_client(mcp_servers={})
+        else:
+            mcp_servers = {
+                name: config.model_dump()
+                for name, config in self.available_servers.items()
+            }
+
+            await self._initialize_client(mcp_servers=mcp_servers)
+
         self._running = True
 
         while self._running:
             user_input = await self.query_queue.get()
 
             if isinstance(user_input, ControlCommand):
                 if user_input == ControlCommand.NEW_CONVERSATION:
+                    self.inferred_servers.clear()
+
                     await self.client.disconnect()
-                    await self.client.connect()
+
+                    if self.config.mcp_server_inference:
+                        await self._initialize_client(mcp_servers={})
+                    else:
+                        mcp_servers = {
+                            name: config.model_dump()
+                            for name, config in self.available_servers.items()
+                        }
+
+                        await self._initialize_client(mcp_servers=mcp_servers)
                 continue
 
+            if self.config.mcp_server_inference:
+                inference_result = await infer_mcp_servers(
+                    user_message=user_input,
+                    available_servers=self.available_servers,
+                    inferred_servers=self.inferred_servers,
+                    session_id=self.session_id,
+                )
+
+                if inference_result["new_servers"]:
+                    server_list = ", ".join(inference_result["new_servers"])
+
+                    await self.on_message(
+                        AgentMessage(
+                            type=AgentMessageType.SYSTEM,
+                            data=f"Connecting to {server_list}...",
+                        )
+                    )
+
+                    await asyncio.sleep(0.1)
+
+                    await self.client.disconnect()
+
+                    mcp_servers = {
+                        name: config.model_dump()
+                        for name, config in inference_result["selected_servers"].items()
+                    }
+
+                    await self._initialize_client(mcp_servers=mcp_servers)
+
             self.interrupting = False
+
+            # Send query
             await self.client.query(user_input)
 
             async for message in self.client.receive_response():
@@ -71,6 +136,8 @@ async def start(self) -> None:
 
     async def _handle_message(self, message: Any) -> None:
         if isinstance(message, SystemMessage):
+            log_json(message.data)
+
             if message.subtype == AgentMessageType.INIT.value and message.data.get(
                 "session_id"
             ):