Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
.PHONY: chat dev console
.PHONY: dev console install start

install:
uv sync && uv run pre-commit install && cp .env.example .env && echo "Please edit the .env file with your API keys."

chat:
uv run chat

console:
uv run textual console -x SYSTEM -x EVENT -x DEBUG -x INFO

dev:
uv run textual run --dev -c chat

start:
uv run chat
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ make install
Update the `.env` with your `ANTHROPIC_API_KEY` and then run:

```bash
make chat
make start

# Alternatively, if in dev (see below)
make dev
Expand Down
4 changes: 2 additions & 2 deletions agent-chat-cli.config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ model: haiku
include_partial_messages: true

# Enable dynamic MCP server inference
mcp_server_inference: true
mcp_server_inference: false

# Named agents with custom configurations
# agents:
Expand Down Expand Up @@ -57,4 +57,4 @@ mcp_servers:
disallowed_tools: []

# Permission mode for tool execution
permission_mode: "bypassPermissions"
permission_mode: "default"
3 changes: 3 additions & 0 deletions src/agent_chat_cli/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from agent_chat_cli.components.header import Header
from agent_chat_cli.components.chat_history import ChatHistory, MessagePosted
from agent_chat_cli.components.thinking_indicator import ThinkingIndicator
from agent_chat_cli.components.tool_permission_prompt import ToolPermissionPrompt
from agent_chat_cli.components.user_input import UserInput
from agent_chat_cli.system.agent_loop import AgentLoop
from agent_chat_cli.system.message_bus import MessageBus
Expand Down Expand Up @@ -38,12 +39,14 @@ def __init__(self) -> None:
)

self.actions = Actions(self)
self.pending_tool_permission: dict | None = None

def compose(self) -> ComposeResult:
with VerticalScroll():
yield Header()
yield ChatHistory()
yield ThinkingIndicator()
yield ToolPermissionPrompt(actions=self.actions)
yield UserInput(actions=self.actions)

async def on_mount(self) -> None:
Expand Down
92 changes: 92 additions & 0 deletions src/agent_chat_cli/components/tool_permission_prompt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
from typing import Any, TYPE_CHECKING

from textual.widget import Widget
from textual.app import ComposeResult
from textual.widgets import Label, Input
from textual.reactive import reactive

from agent_chat_cli.components.caret import Caret
from agent_chat_cli.components.flex import Flex
from agent_chat_cli.components.spacer import Spacer
from agent_chat_cli.utils import get_tool_info
from agent_chat_cli.utils.logger import log_json

if TYPE_CHECKING:
from agent_chat_cli.system.actions import Actions


class ToolPermissionPrompt(Widget):
is_visible = reactive(False)
tool_name = reactive("")
tool_input: dict[str, Any] = reactive({}, init=False) # type: ignore[assignment]

def __init__(self, actions: "Actions") -> None:
super().__init__()
self.actions = actions

def compose(self) -> ComposeResult:
yield Label("", id="tool-display")
yield Label(" [dim]Allow? (Enter=yes, ESC=no, or ask another question):[/dim]")

yield Spacer()

with Flex():
yield Caret()
yield Input(placeholder="Yes", id="permission-input")

def watch_is_visible(self, is_visible: bool) -> None:
self.display = is_visible

if is_visible:
input_widget = self.query_one("#permission-input", Input)
input_widget.value = ""
input_widget.focus()

def watch_tool_name(self, tool_name: str) -> None:
if not tool_name:
return

tool_info = get_tool_info(tool_name)
tool_display_label = self.query_one("#tool-display", Label)

if tool_info["server_name"]:
tool_display = (
rf"[bold]Confirm Tool:[/] [cyan]\[{tool_info['server_name']}][/] "
f"{tool_info['tool_name']}"
)
else:
tool_display = f"[bold]Confirm Tool:[/] {tool_name}"

tool_display_label.update(tool_display)

async def on_input_submitted(self, event: Input.Submitted) -> None:
raw_value = event.value
response = event.value.strip() or "yes"

log_json(
{
"event": "permission_input_submitted",
"raw_value": raw_value,
"stripped_value": event.value.strip(),
"final_response": response,
}
)

await self.actions.respond_to_tool_permission(response)

async def on_input_blurred(self, event: Input.Blurred) -> None:
if self.is_visible:
input_widget = self.query_one("#permission-input", Input)
input_widget.focus()

async def on_key(self, event) -> None:
if event.key == "escape":
log_json({"event": "permission_escape_pressed"})

event.stop()
event.prevent_default()

input_widget = self.query_one("#permission-input", Input)
input_widget.value = "no"

await input_widget.action_submit()
113 changes: 110 additions & 3 deletions src/agent_chat_cli/docs/architecture.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ Textual widgets responsible for UI rendering:
- **Message widgets**: SystemMessage, UserMessage, AgentMessage, ToolMessage
- **UserInput**: Handles user text input and submission
- **ThinkingIndicator**: Shows when agent is processing
- **ToolPermissionPrompt**: Interactive widget for approving/denying tool execution requests

### System Layer

Expand All @@ -26,6 +27,9 @@ Manages the conversation loop with Claude SDK:
- Emits AgentMessageType events (STREAM_EVENT, ASSISTANT, RESULT)
- Manages session persistence via session_id
- Supports dynamic MCP server inference and loading
- Implements `_can_use_tool` callback for interactive tool permission requests
- Uses `permission_lock` (asyncio.Lock) to serialize parallel permission requests
- Manages `permission_response_queue` for user responses to tool permission prompts

#### MCP Server Inference (`system/mcp_inference.py`)
Intelligently determines which MCP servers are needed for each query:
Expand All @@ -42,19 +46,23 @@ Routes agent messages to appropriate UI components:
- Controls thinking indicator state
- Manages scroll-to-bottom behavior
- Displays system messages (e.g., MCP server connection notifications)
- Detects tool permission requests and shows ToolPermissionPrompt
- Manages UI transitions between UserInput and ToolPermissionPrompt

#### Actions (`system/actions.py`)
Centralizes all user-initiated actions and controls:
- **quit()**: Exits the application
- **query(user_input)**: Sends user query to agent loop queue
- **interrupt()**: Stops streaming mid-execution by setting interrupt flag and calling SDK interrupt
- **interrupt()**: Stops streaming mid-execution by setting interrupt flag and calling SDK interrupt (ignores ESC when tool permission prompt is visible)
- **new()**: Starts new conversation by sending NEW_CONVERSATION control command
- **respond_to_tool_permission(response)**: Handles tool permission responses, manages UI state transitions between permission prompt and user input
- Manages UI state (thinking indicator, chat history clearing)
- Directly accesses agent_loop internals (query_queue, client, interrupting flag)
- Directly accesses agent_loop internals (query_queue, client, interrupting flag, permission_response_queue)

Actions are triggered via:
- Keybindings in app.py (ESC → action_interrupt, Ctrl+N → action_new)
- Text commands in user_input.py ("exit", "clear")
- Component events (ToolPermissionPrompt.on_input_submitted → respond_to_tool_permission)

### Utils Layer

Expand Down Expand Up @@ -211,6 +219,103 @@ mcp_servers:
# ... rest of config
```

## Tool Permission System

The application implements interactive tool permission requests that allow users to approve or deny tool execution in real-time.

### Components

#### ToolPermissionPrompt (`components/tool_permission_prompt.py`)
Textual widget that displays permission requests to the user:
- Shows tool name with MCP server info
- Provides input field for user response
- Supports Enter (approve), ESC (deny), or custom text responses

### Permission Flow

```
Tool Execution Request (from Claude SDK)
AgentLoop._can_use_tool (callback with permission_lock acquired)
Emit SYSTEM AgentMessage with tool_permission_request data
MessageBus._handle_system detects permission request
Show ToolPermissionPrompt, hide UserInput
User Response:
- Enter (or "yes") → Approve
- ESC (or "no") → Deny
- Custom text → Send to Claude as alternative instruction
Actions.respond_to_tool_permission(response)
Put response on permission_response_queue
Hide ToolPermissionPrompt, show UserInput
AgentLoop._can_use_tool receives response
Return PermissionResultAllow or PermissionResultDeny
Next tool permission request (if multiple tools called)
```

### Serialization with Permission Lock

When multiple tools request permission in parallel, a `permission_lock` (asyncio.Lock) ensures they are handled sequentially:

1. First tool acquires lock → Shows prompt → Waits for response → Releases lock
2. Second tool acquires lock → Shows prompt → Waits for response → Releases lock
3. Third tool acquires lock → Shows prompt → Waits for response → Releases lock

This prevents race conditions where multiple prompts would overwrite each other and ensures each tool gets a dedicated user response.

### Permission Responses

The `_can_use_tool` callback returns typed permission results:

**Approve (CONFIRM)**:
```python
return PermissionResultAllow(
behavior="allow",
updated_input=tool_input,
)
```

**Deny (DENY)**:
```python
return PermissionResultDeny(
behavior="deny",
message="User denied permission",
interrupt=True,
)
```

**Custom Response**:
```python
return PermissionResultDeny(
behavior="deny",
message=user_response, # Alternative instruction sent to Claude
interrupt=True,
)
```

### ESC Key Handling

When ToolPermissionPrompt is visible, the ESC key is intercepted:
- `Actions.interrupt()` checks `permission_prompt.is_visible`
- If visible, returns early without interrupting the agent
- ToolPermissionPrompt's `on_key` handler processes ESC to deny the tool
- If not visible, ESC performs normal interrupt behavior

### System Messages

Permission denials generate system messages in the chat:
- **Denied**: `"Permission denied for {tool_name}"`
- **Custom response**: `"Custom response for {tool_name}: {user_response}"`

## User Commands

### Text Commands
Expand All @@ -219,7 +324,7 @@ mcp_servers:

### Keybindings
- **Ctrl+C**: Quit application
- **ESC**: Interrupt streaming response
- **ESC**: Interrupt streaming response (or deny tool permission if prompt visible)
- **Ctrl+N**: Start new conversation

## Session Management
Expand Down Expand Up @@ -274,3 +379,5 @@ SDK reconnects to previous session with full history
- Control commands are queued alongside user queries to ensure proper task ordering
- Agent loop processes both strings (user queries) and ControlCommands from the same queue
- Interrupt flag is checked on each streaming message to enable immediate stop
- Tool permission requests are serialized via asyncio.Lock to handle parallel tool calls sequentially
- Permission responses use typed SDK objects (PermissionResultAllow, PermissionResultDeny) rather than plain dictionaries
31 changes: 31 additions & 0 deletions src/agent_chat_cli/system/actions.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,11 @@
from textual.widgets import Input

from agent_chat_cli.system.agent_loop import AgentLoop
from agent_chat_cli.utils.enums import ControlCommand
from agent_chat_cli.components.chat_history import ChatHistory
from agent_chat_cli.components.thinking_indicator import ThinkingIndicator
from agent_chat_cli.components.tool_permission_prompt import ToolPermissionPrompt
from agent_chat_cli.utils.logger import log_json


class Actions:
Expand All @@ -16,6 +20,10 @@ async def query(self, user_input: str) -> None:
await self.agent_loop.query_queue.put(user_input)

async def interrupt(self) -> None:
permission_prompt = self.app.query_one(ToolPermissionPrompt)
if permission_prompt.is_visible:
return

self.agent_loop.interrupting = True
await self.agent_loop.client.interrupt()

Expand All @@ -30,3 +38,26 @@ async def new(self) -> None:

thinking_indicator = self.app.query_one(ThinkingIndicator)
thinking_indicator.is_thinking = False

async def respond_to_tool_permission(self, response: str) -> None:
from agent_chat_cli.components.user_input import UserInput

log_json(
{
"event": "permission_response_action",
"response": response,
}
)

await self.agent_loop.permission_response_queue.put(response)

thinking_indicator = self.app.query_one(ThinkingIndicator)
thinking_indicator.is_thinking = True

permission_prompt = self.app.query_one(ToolPermissionPrompt)
permission_prompt.is_visible = False

user_input = self.app.query_one(UserInput)
user_input.display = True
input_widget = user_input.query_one(Input)
input_widget.focus()
Loading