From b377ec2e72720c1da3adb8718a4766cfc1140a62 Mon Sep 17 00:00:00 2001 From: Test User Date: Sun, 15 Feb 2026 22:00:34 -0700 Subject: [PATCH 1/2] feat: switch default engine to react and update documentation (#355) Switch the default execution engine from "plan" to "react" (ReAct) across CLI, API, and runtime layers. Update all documentation to reflect the new default and mark Phase 2.5 as complete. Closes #355 --- CLAUDE.md | 105 ++++++++++++++++---- codeframe/cli/app.py | 12 +-- codeframe/core/conductor.py | 8 +- codeframe/core/runtime.py | 4 +- codeframe/ui/routers/tasks_v2.py | 10 +- docs/AGENT_V3_UNIFIED_PLAN.md | 6 +- docs/V2_STRATEGIC_ROADMAP.md | 62 +++++++++++- tests/cli/test_v2_cli_integration.py | 6 +- tests/core/test_agent_streaming.py | 22 ++-- tests/core/test_react_engine_integration.py | 53 +++++----- tests/integration/test_tasks_v2_engine.py | 8 +- 11 files changed, 209 insertions(+), 87 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 80dddc15..0671a1d9 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -31,13 +31,14 @@ If you are an agent working in this repo: **do not improvise architecture**. Fol --- -## Current Reality (Phase 1 & 2 Complete) +## Current Reality (Phase 1, 2 & 2.5 Complete) ### What's Working Now -- **Full agent execution**: `cf work start --execute` +- **Full agent execution**: `cf work start --execute` (uses ReAct engine by default) +- **Engine selection**: `--engine react` (default) or `--engine plan` (legacy) - **Verbose mode**: `cf work start --execute --verbose` shows detailed progress - **Dry run mode**: `cf work start --execute --dry-run` -- **Self-correction loop**: Agent automatically fixes failing verification gates (up to 3 attempts) +- **Self-correction loop**: Agent automatically fixes failing verification gates (up to 5 attempts with ReAct) - **FAILED task status**: Tasks can transition to FAILED for proper error visibility - **Tech stack configuration**: `cf init . --detect` auto-detects tech stack from project files - **Project preferences**: Agent loads AGENTS.md or CLAUDE.md for per-project configuration @@ -79,9 +80,12 @@ If you are an agent working in this repo: **do not improvise architecture**. Fol ``` codeframe/ ├── core/ # Headless domain + orchestration (NO FastAPI imports) -│ ├── agent.py # Agent orchestrator with blocker detection -│ ├── planner.py # LLM-powered implementation planning -│ ├── executor.py # Code execution engine with rollback +│ ├── react_agent.py # ReAct agent (default engine) - observe-think-act loop +│ ├── tools.py # Tool definitions for ReAct agent (7 tools) +│ ├── editor.py # Search-replace file editor with fuzzy matching +│ ├── agent.py # Legacy plan-based agent (--engine plan) +│ ├── planner.py # LLM-powered implementation planning (plan engine) +│ ├── executor.py # Code execution engine with rollback (plan engine) │ ├── context.py # Task context loader with relevance scoring │ ├── tasks.py # Task management with depends_on field │ ├── blockers.py # Human-in-the-loop blocker system @@ -200,14 +204,17 @@ At all times: | Component | File | Purpose | |-----------|------|---------| +| **ReactAgent** | **`core/react_agent.py`** | **Default engine: observe-think-act loop with tool use** | +| **Tools** | **`core/tools.py`** | **7 agent tools: read/edit/create file, run command/tests, search, list** | +| **Editor** | **`core/editor.py`** | **Search-replace editor with 4-level fuzzy matching** | | LLM Adapter | `adapters/llm/base.py` | Protocol, ModelSelector, Purpose enum | | Anthropic Provider | `adapters/llm/anthropic.py` | Claude integration with streaming | | Mock Provider | `adapters/llm/mock.py` | Testing with call tracking | | Context Loader | `core/context.py` | Codebase scanning, relevance scoring | -| Planner | `core/planner.py` | Task → ImplementationPlan via LLM | -| Executor | `core/executor.py` | File ops, shell commands, rollback | -| Agent | `core/agent.py` | Orchestration loop, blocker detection | -| Runtime | `core/runtime.py` | Run lifecycle, agent invocation | +| Planner | `core/planner.py` | Task → ImplementationPlan via LLM (plan engine) | +| Executor | `core/executor.py` | File ops, shell commands, rollback (plan engine) | +| Agent (legacy) | `core/agent.py` | Plan-based orchestration (--engine plan) | +| Runtime | `core/runtime.py` | Run lifecycle, engine selection, agent invocation | | Conductor | `core/conductor.py` | Batch orchestration, worker pool | | Dependency Graph | `core/dependency_graph.py` | DAG operations, topological sort | | Dependency Analyzer | `core/dependency_analyzer.py` | LLM-based dependency inference | @@ -228,13 +235,50 @@ Task-based heuristic via `Purpose` enum: Future: `cf tasks set provider ` for per-task override. -### Execution Flow +### Engine Selection + +CodeFRAME supports two execution engines, selected via `--engine`: + +| Engine | Flag | Pattern | Best For | +|--------|------|---------|----------| +| **ReAct** (default) | `--engine react` | Observe → Think → Act loop | Most tasks, adaptive execution | +| **Plan** (legacy) | `--engine plan` | Plan all steps → Execute sequentially | Well-defined, predictable tasks | + +### Execution Flow (ReAct — default) ``` cf work start --execute [--verbose] │ ├── runtime.start_task_run() # Creates run, transitions task→IN_PROGRESS │ - └── runtime.execute_agent(verbose=True/False) + └── runtime.execute_agent(engine="react") + │ + └── ReactAgent.run(task_id) + ├── Load context (PRD, codebase, blockers, AGENTS.md, tech_stack) + ├── Build layered system prompt + │ + └── Tool-use loop (until complete/blocked/failed): + ├── LLM decides next action (tool call) + ├── Execute tool: read_file, edit_file, create_file, + │ run_command, run_tests, search_codebase, list_files + ├── Observe result → feed back to LLM + ├── Incremental verification (ruff after file changes) + └── Token budget management (3-tier compaction) + │ + └── Final verification with self-correction (up to 5 retries) + │ + └── Update run/task status based on agent result + ├── COMPLETED → complete_run() → task→DONE + ├── BLOCKED → block_run() → task→BLOCKED + └── FAILED → fail_run() → task→FAILED +``` + +### Execution Flow (Plan — legacy, `--engine plan`) +``` +cf work start --execute --engine plan + │ + ├── runtime.start_task_run() + │ + └── runtime.execute_agent(engine="plan") │ ├── agent.run(task_id) │ ├── Load context (PRD, codebase, blockers, AGENTS.md) @@ -289,7 +333,8 @@ cf tasks show # Work execution (single task) cf work start # Creates run record -cf work start --execute # Runs AI agent +cf work start --execute # Runs AI agent (ReAct engine, default) +cf work start --execute --engine plan # Use legacy plan engine cf work start --execute --verbose # With detailed output cf work start --execute --dry-run # Preview changes cf work stop # Cancel stale run @@ -298,13 +343,14 @@ cf work follow # Stream real-time output cf work follow --tail 50 # Show last 50 lines then stream # Batch execution (multiple tasks) -cf work batch run ... # Execute multiple tasks +cf work batch run ... # Execute multiple tasks (ReAct default) cf work batch run --all-ready # All READY tasks +cf work batch run --all-ready --engine plan # Use legacy plan engine cf work batch run --strategy serial # Serial (default) cf work batch run --strategy parallel # Parallel execution cf work batch run --strategy auto # LLM-inferred dependencies cf work batch run --max-parallel 4 # Concurrent limit -cf work batch run --retry 3 # Auto-retry failures +cf work batch run --retry 3 # Auto-retry failures cf work batch status [batch_id] # Show batch status cf work batch cancel # Cancel running batch cf work batch resume # Re-run failed tasks @@ -360,6 +406,11 @@ Do not expand frontend scope during Golden Path work. - `docs/AGENT_IMPLEMENTATION_TASKS.md` - Agent system components - `docs/V2_STRATEGIC_ROADMAP.md` - 5-phase plan from CLI to multi-agent +### Agent Architecture (Phase 2.5) +- `docs/AGENT_V3_UNIFIED_PLAN.md` - ReAct architecture design and rules +- `docs/REACT_AGENT_ARCHITECTURE.md` - Deep-dive: tools, editor, token management +- `docs/REACT_AGENT_ANALYSIS.md` - Golden path test run analysis + ### API Documentation (Phase 2) - `/docs` - Swagger UI (interactive API explorer) - `/redoc` - ReDoc (readable API documentation) @@ -406,19 +457,37 @@ If you are unsure which direction to take, default to: --- -## Recent Updates (2026-02-03) +## Recent Updates (2026-02-15) -### Phase 2 Complete: Server Layer -All Phase 2 deliverables are complete: +### Phase 2.5 Complete: ReAct Agent Architecture (#355) +Default execution engine switched from plan-based to **ReAct (Reasoning + Acting)**. + +**What changed:** +- Default engine is now `"react"` — all `cf work start --execute` and `cf work batch run` commands use ReactAgent +- Legacy plan engine available via `--engine plan` flag +- ReactAgent uses iterative tool-use loop (observe → think → act) instead of plan-all-then-execute +- 7 structured tools: `read_file`, `edit_file`, `create_file`, `run_command`, `run_tests`, `search_codebase`, `list_files` +- Search-replace editing with 4-level fuzzy matching (exact → whitespace-normalized → indentation-agnostic → fuzzy) +- Token budget management with 3-tier compaction +- Adaptive iteration budget based on task complexity + +**Phase 2.5 deliverables:** +- ✅ ReAct agent implementation (`core/react_agent.py`, `core/tools.py`, `core/editor.py`) +- ✅ CLI `--engine` flag (#353) +- ✅ API engine parameter (#354) +- ✅ Default switch to react + documentation (#355) | Phase | Focus | Status | |-------|-------|--------| | 1 | CLI Completion | ✅ **Complete** | | 2 | Server Layer | ✅ **Complete** | +| 2.5 | ReAct Agent | ✅ **Complete** | | 3 | Web UI Rebuild | Planned | | 4 | Multi-Agent Coordination | Planned | | 5 | Advanced Features | Planned | +### Phase 2 Complete: Server Layer (2026-02-03) + **Phase 2 deliverables completed:** - ✅ Server audit and refactor (#322) - 15 v2 routers following thin adapter pattern - ✅ API key authentication (#326) - Scopes: read/write/admin diff --git a/codeframe/cli/app.py b/codeframe/cli/app.py index 5bc10ade..9898b8e1 100644 --- a/codeframe/cli/app.py +++ b/codeframe/cli/app.py @@ -1994,9 +1994,9 @@ def work_start( help="Run stub agent (for testing, does nothing real)", ), engine: str = typer.Option( - "plan", + "react", "--engine", - help="Agent engine: 'plan' (default, step-based) or 'react' (ReAct tool-use loop)", + help="Agent engine: 'react' (default, ReAct tool-use loop) or 'plan' (legacy step-based)", ), ) -> None: """Start working on a task. @@ -2007,7 +2007,7 @@ def work_start( Example: codeframe work start abc123 codeframe work start abc123 --execute - codeframe work start abc123 --execute --engine react + codeframe work start abc123 --execute --engine plan codeframe work start abc123 --execute --dry-run codeframe work start abc123 --execute --verbose """ @@ -2860,9 +2860,9 @@ def batch_run( help="Run verification gates (pytest, ruff) after successful batch completion", ), engine: str = typer.Option( - "plan", + "react", "--engine", - help="Agent engine: 'plan' (default, step-based) or 'react' (ReAct tool-use loop)", + help="Agent engine: 'react' (default, ReAct tool-use loop) or 'plan' (legacy step-based)", ), ) -> None: """Execute multiple tasks in batch. @@ -2876,7 +2876,7 @@ def batch_run( codeframe work batch run task1 task2 task3 codeframe work batch run --all-ready codeframe work batch run --all-ready --strategy serial - codeframe work batch run --all-ready --engine react + codeframe work batch run --all-ready --engine plan codeframe work batch run task1 task2 --dry-run codeframe work batch run task1 task2 --retry 2 """ diff --git a/codeframe/core/conductor.py b/codeframe/core/conductor.py index b6e1ff5a..6af50631 100644 --- a/codeframe/core/conductor.py +++ b/codeframe/core/conductor.py @@ -456,7 +456,7 @@ class BatchRun: started_at: datetime completed_at: Optional[datetime] results: dict[str, str] = field(default_factory=dict) - engine: str = "plan" + engine: str = "react" def start_batch( @@ -468,7 +468,7 @@ def start_batch( dry_run: bool = False, max_retries: int = 0, on_event: Optional[Callable[[str, dict], None]] = None, - engine: str = "plan", + engine: str = "react", ) -> BatchRun: """Start a batch execution of multiple tasks. @@ -481,7 +481,7 @@ def start_batch( dry_run: If True, don't actually execute tasks max_retries: Max retry attempts for failed tasks (0 = no retries) on_event: Optional callback for batch events - engine: Agent engine to use ("plan" or "react") + engine: Agent engine to use ("react" default, or "plan" for legacy) Returns: BatchRun with results populated @@ -1699,7 +1699,7 @@ def _execute_task_subprocess( workspace: Workspace, task_id: str, batch_id: Optional[str] = None, - engine: str = "plan", + engine: str = "react", ) -> str: """Execute a single task via subprocess. diff --git a/codeframe/core/runtime.py b/codeframe/core/runtime.py index 17aa2848..4abb301d 100644 --- a/codeframe/core/runtime.py +++ b/codeframe/core/runtime.py @@ -596,7 +596,7 @@ def execute_agent( verbose: bool = False, fix_coordinator: Optional["GlobalFixCoordinator"] = None, event_publisher: Optional["EventPublisher"] = None, - engine: str = "plan", + engine: str = "react", ) -> "AgentState": """Execute a task using the agent orchestrator. @@ -611,7 +611,7 @@ def execute_agent( verbose: If True, print detailed progress to stdout fix_coordinator: Optional coordinator for global fixes (for parallel execution) event_publisher: Optional EventPublisher for SSE streaming (real-time events) - engine: Agent engine to use ("plan" for existing Agent, "react" for ReactAgent) + engine: Agent engine to use ("react" for ReactAgent (default), "plan" for legacy Agent) Returns: Final AgentState after execution diff --git a/codeframe/ui/routers/tasks_v2.py b/codeframe/ui/routers/tasks_v2.py index 875edbba..da20f243 100644 --- a/codeframe/ui/routers/tasks_v2.py +++ b/codeframe/ui/routers/tasks_v2.py @@ -52,8 +52,8 @@ class ApproveTasksRequest(BaseModel): description="Whether to start batch execution after approval", ) engine: str = Field( - "plan", - description="Execution engine: 'plan' (default) or 'react' (ReAct loop)", + "react", + description="Execution engine: 'react' (default, ReAct loop) or 'plan' (legacy step-based)", ) @model_validator(mode="after") @@ -109,8 +109,8 @@ class StartExecutionRequest(BaseModel): description="Number of retries for failed tasks", ) engine: str = Field( - "plan", - description="Execution engine: 'plan' (default) or 'react' (ReAct loop)", + "react", + description="Execution engine: 'react' (default, ReAct loop) or 'plan' (legacy step-based)", ) @model_validator(mode="after") @@ -586,7 +586,7 @@ async def start_single_task( execute: bool = Query(False, description="Run agent execution (requires ANTHROPIC_API_KEY)"), dry_run: bool = Query(False, description="Preview changes without making them"), verbose: bool = Query(False, description="Show detailed progress output"), - engine: Literal["plan", "react"] = Query("plan", description="Execution engine: 'plan' (default) or 'react' (ReAct loop)"), + engine: Literal["plan", "react"] = Query("react", description="Execution engine: 'react' (default, ReAct loop) or 'plan' (legacy step-based)"), workspace: Workspace = Depends(get_v2_workspace), ) -> dict[str, Any]: """Start a single task run. diff --git a/docs/AGENT_V3_UNIFIED_PLAN.md b/docs/AGENT_V3_UNIFIED_PLAN.md index b4167684..065ae4cf 100644 --- a/docs/AGENT_V3_UNIFIED_PLAN.md +++ b/docs/AGENT_V3_UNIFIED_PLAN.md @@ -1,7 +1,7 @@ # Agent V3: Unified Architectural Plan **Date**: 2026-02-07 -**Status**: Final Draft — Synthesized from research team debate +**Status**: ✅ Implemented — Default engine since 2026-02-15 (#355) **Sources**: AGENT_ARCHITECTURE_RESEARCH.md, AGENT_FRAMEWORK_DEEP_DIVE.md, AGENT_ARCHITECTURE_CRITIQUE.md, REACT_AGENT_ARCHITECTURE.md --- @@ -27,7 +27,7 @@ This plan redesigns CodeFRAME's agent execution from Plan-and-Execute to a **Hyb 3. **Lint after every file change** — catch errors immediately, not after 92 accumulate 4. **Model is the planner** — the LLM decides what to do next based on observed reality 5. **Fewer tools = higher accuracy** — 7 focused tools, not a large surface area -6. **Backward compatible** — `--engine plan` preserved as default until ReAct is validated +6. **Backward compatible** — `--engine plan` available as fallback (ReAct is now default) --- @@ -39,7 +39,7 @@ This plan redesigns CodeFRAME's agent execution from Plan-and-Execute to a **Hyb cf work start --execute [--engine react] │ ├── runtime.start_task_run() - │ └── Select engine: "plan" (default, existing) or "react" (new) + │ └── Select engine: "react" (default) or "plan" (legacy) │ └── runtime.execute_agent(engine="react") │ diff --git a/docs/V2_STRATEGIC_ROADMAP.md b/docs/V2_STRATEGIC_ROADMAP.md index 303f4254..ce46ec81 100644 --- a/docs/V2_STRATEGIC_ROADMAP.md +++ b/docs/V2_STRATEGIC_ROADMAP.md @@ -1,8 +1,8 @@ # CodeFRAME v2 Strategic Roadmap **Created**: 2026-01-29 -**Updated**: 2026-02-03 -**Status**: Active - Phase 2 In Progress +**Updated**: 2026-02-15 +**Status**: Active - Phase 2.5 Complete, Phase 3 Next ## Executive Summary @@ -164,6 +164,59 @@ See `docs/PHASE_2_DEVELOPER_GUIDE.md` for implementation guide. --- +## Phase 2.5: ReAct Agent Architecture ✅ COMPLETE + +**Goal**: Replace plan-then-execute agent with iterative ReAct (Reasoning + Acting) loop as the default engine. +**Status**: ✅ **COMPLETE** (2026-02-15) + +### Motivation + +The plan-based agent had several failure modes discovered during testing: +- Config file overwrites (whole-file generation ignores existing content) +- Cross-file naming inconsistency (each file generated in isolation) +- Accumulated lint errors (no incremental verification) +- Ineffective self-correction (empty error context) + +### Deliverables + +1. **ReAct Agent Implementation** - ✅ COMPLETE + - `core/react_agent.py` - Observe-Think-Act loop with tool use + - `core/tools.py` - 7 structured tools (read/edit/create file, run command/tests, search, list) + - `core/editor.py` - Search-replace editor with 4-level fuzzy matching + +2. **Engine Selection** - ✅ COMPLETE + - `--engine react` (default) or `--engine plan` (legacy) on all work commands + - Runtime routes to ReactAgent or Agent based on engine parameter + - API endpoints support engine parameter with validation + +3. **CLI Validation** (#353) - ✅ COMPLETE + - `--engine` flag on `cf work start` and `cf work batch run` + - Default switched to "react" + +4. **API Validation** (#354) - ✅ COMPLETE + - Engine parameter on execute, approve, and stream endpoints + - Backward compatible — omitting engine uses "react" default + +5. **Default Switch + Documentation** (#355) - ✅ COMPLETE + - Default engine changed from "plan" to "react" across CLI, API, and runtime + - CLAUDE.md updated with ReAct architecture documentation + +### Key Architecture Decisions + +- **Search-replace editing**: ~98% accuracy vs ~70-80% for whole-file regeneration +- **Read before write**: Agent always sees actual file state before editing +- **Lint after every change**: Catch errors immediately, not after they accumulate +- **7 focused tools**: Fewer tools = higher accuracy +- **Token budget management**: 3-tier compaction prevents context window overflow +- **Adaptive iteration budget**: Task complexity scoring adjusts iteration limits + +### Reference Documentation +- `docs/AGENT_V3_UNIFIED_PLAN.md` - Architecture design and rules +- `docs/REACT_AGENT_ARCHITECTURE.md` - Deep-dive on tools, editor, token management +- `docs/PHASE_25_VALIDATION_REPORT.md` - End-to-end validation results + +--- + ## Phase 3: Web UI Rebuild **Goal**: Modern dashboard consuming REST/WebSocket API. @@ -317,9 +370,10 @@ After each phase: | Phase | Focus | Key Outcome | Status | |-------|-------|-------------|--------| | 1 | CLI Completion | Production-ready headless agent | ✅ **COMPLETE** | -| 2 | Server Layer | REST API + real-time events | 🔄 **90% COMPLETE** | +| 2 | Server Layer | REST API + real-time events | ✅ **COMPLETE** | +| 2.5 | ReAct Agent | Iterative tool-use execution engine | ✅ **COMPLETE** | | 3 | Web UI | Modern dashboard | Planned | | 4 | Multi-Agent | Agent swarms | Planned | | 5 | Advanced | Power features | Planned | -**Current focus**: Phase 2 - Completing remaining items (WebSocket events, OpenAPI docs, pagination). +**Current focus**: Phase 3 - Web UI rebuild on v2 foundation. diff --git a/tests/cli/test_v2_cli_integration.py b/tests/cli/test_v2_cli_integration.py index b2cce076..a59e7fc8 100644 --- a/tests/cli/test_v2_cli_integration.py +++ b/tests/cli/test_v2_cli_integration.py @@ -873,6 +873,7 @@ def test_execute_dry_run(self, workspace_with_ready_tasks, mock_llm): [ "work", "start", tid, "--execute", "--dry-run", + "--engine", "plan", "-w", str(workspace_with_ready_tasks), ], ) @@ -882,7 +883,7 @@ def test_execute_dry_run(self, workspace_with_ready_tasks, mock_llm): assert provider.call_count >= 1 def test_execute_creates_file(self, workspace_with_ready_tasks, mock_llm): - """work start --execute runs agent that creates a file via MockProvider.""" + """work start --execute --engine plan runs agent that creates a file via MockProvider.""" # Plan says create hello.py, executor generates content via LLM provider = mock_llm([MOCK_PLAN_RESPONSE, MOCK_FILE_CONTENT]) @@ -896,6 +897,7 @@ def test_execute_creates_file(self, workspace_with_ready_tasks, mock_llm): [ "work", "start", tid, "--execute", + "--engine", "plan", "-w", str(workspace_with_ready_tasks), ], ) @@ -949,7 +951,7 @@ def test_ai_golden_path(self, temp_repo, mock_llm): assert len(task_list) > 0 tid = task_list[0].id[:8] - r = runner.invoke(app, ["work", "start", tid, "--execute", "-w", wp]) + r = runner.invoke(app, ["work", "start", tid, "--execute", "--engine", "plan", "-w", wp]) assert r.exit_code == 0, f"work start --execute: {r.output}" # Verify LLM was exercised through the full path diff --git a/tests/core/test_agent_streaming.py b/tests/core/test_agent_streaming.py index e468fcb7..2d82b75d 100644 --- a/tests/core/test_agent_streaming.py +++ b/tests/core/test_agent_streaming.py @@ -151,21 +151,19 @@ def test_execute_agent_creates_output_logger(self, temp_workspace: Workspace): """Runtime execute_agent should create an output logger for the run.""" from codeframe.core import runtime, tasks as tasks_module from codeframe.core.streaming import run_output_exists + from codeframe.core.agent import AgentStatus # Create task and run task = tasks_module.create(temp_workspace, title="Test task") run = runtime.start_task_run(temp_workspace, task.id) - # Mock the Agent class at its definition location - with patch("codeframe.core.agent.Agent") as MockAgent, \ + # Mock the ReactAgent class (default engine is now "react") + with patch("codeframe.core.react_agent.ReactAgent") as MockReact, \ patch("codeframe.adapters.llm.get_provider"): mock_agent = MagicMock() - mock_agent.run.return_value = MagicMock( - status=MagicMock(value="completed"), - blocker=None, - ) - MockAgent.return_value = mock_agent + mock_agent.run.return_value = AgentStatus.COMPLETED + MockReact.return_value = mock_agent # Patch os.getenv to provide API key with patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"}): @@ -178,9 +176,10 @@ def test_execute_agent_creates_output_logger(self, temp_workspace: Workspace): assert run_output_exists(temp_workspace, run.id) def test_output_logger_passed_to_agent(self, temp_workspace: Workspace): - """Runtime should pass the output logger to the Agent.""" + """Runtime should pass the output logger to the ReactAgent (default engine).""" from codeframe.core import runtime, tasks as tasks_module from codeframe.core.streaming import RunOutputLogger + from codeframe.core.agent import AgentStatus task = tasks_module.create(temp_workspace, title="Test task") run = runtime.start_task_run(temp_workspace, task.id) @@ -191,13 +190,10 @@ def capture_agent(*args, **kwargs): nonlocal captured_logger captured_logger = kwargs.get("output_logger") mock = MagicMock() - mock.run.return_value = MagicMock( - status=MagicMock(value="completed"), - blocker=None, - ) + mock.run.return_value = AgentStatus.COMPLETED return mock - with patch("codeframe.core.agent.Agent", side_effect=capture_agent), \ + with patch("codeframe.core.react_agent.ReactAgent", side_effect=capture_agent), \ patch("codeframe.adapters.llm.get_provider"), \ patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"}): try: diff --git a/tests/core/test_react_engine_integration.py b/tests/core/test_react_engine_integration.py index 4a706119..7e59e323 100644 --- a/tests/core/test_react_engine_integration.py +++ b/tests/core/test_react_engine_integration.py @@ -65,24 +65,24 @@ class TestRuntimeEngineSelection: @patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"}) @patch("codeframe.core.streaming.RunOutputLogger") @patch("codeframe.adapters.llm.get_provider") - @patch("codeframe.core.agent.Agent") - def test_default_engine_uses_plan_agent( - self, mock_agent_cls, mock_get_provider, mock_output_logger, temp_workspace + @patch("codeframe.core.react_agent.ReactAgent") + def test_default_engine_uses_react_agent( + self, mock_react_cls, mock_get_provider, mock_output_logger, temp_workspace ): - """Default engine ('plan') should use the existing Agent class.""" + """Default engine ('react') should use the ReactAgent class.""" from codeframe.core.runtime import execute_agent, start_task_run task = tasks.create(temp_workspace, title="Test", status=TaskStatus.READY) run = start_task_run(temp_workspace, task.id) - # Mock agent + # Mock agent — ReactAgent.run() returns AgentStatus, not AgentState mock_agent = MagicMock() - mock_agent.run.return_value = AgentState(status=AgentStatus.COMPLETED) - mock_agent_cls.return_value = mock_agent + mock_agent.run.return_value = AgentStatus.COMPLETED + mock_react_cls.return_value = mock_agent state = execute_agent(temp_workspace, run) - mock_agent_cls.assert_called_once() + mock_react_cls.assert_called_once() assert state.status == AgentStatus.COMPLETED @patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"}) @@ -223,7 +223,7 @@ class TestBatchRunEngineField: """Tests for engine field on BatchRun dataclass.""" def test_batch_run_default_engine(self): - """BatchRun should default to engine='plan'.""" + """BatchRun should default to engine='react'.""" batch = BatchRun( id="test-batch", workspace_id="ws-1", @@ -235,7 +235,7 @@ def test_batch_run_default_engine(self): started_at=datetime.now(timezone.utc), completed_at=None, ) - assert batch.engine == "plan" + assert batch.engine == "react" def test_batch_run_react_engine(self): """BatchRun should accept engine='react'.""" @@ -281,7 +281,7 @@ def test_save_and_load_batch_with_engine(self, temp_workspace): assert loaded.engine == "react" def test_save_and_load_batch_default_engine(self, temp_workspace): - """Default engine ('plan') should persist correctly.""" + """Default engine ('react') should persist correctly.""" from codeframe.core.conductor import get_batch batch = BatchRun( @@ -300,7 +300,7 @@ def test_save_and_load_batch_default_engine(self, temp_workspace): loaded = get_batch(temp_workspace, "test-engine-default") assert loaded is not None - assert loaded.engine == "plan" + assert loaded.engine == "react" class TestSubprocessCommandConstruction: @@ -330,8 +330,8 @@ def test_subprocess_includes_engine_flag(self, mock_popen, temp_workspace): assert "react" in cmd @patch("codeframe.core.conductor.subprocess.Popen") - def test_subprocess_default_engine_is_plan(self, mock_popen, temp_workspace): - """Default engine should be 'plan' in subprocess command.""" + def test_subprocess_default_engine_is_react(self, mock_popen, temp_workspace): + """Default engine should be 'react' in subprocess command.""" from codeframe.core.conductor import _execute_task_subprocess from codeframe.core.runtime import RunStatus @@ -348,7 +348,7 @@ def test_subprocess_default_engine_is_plan(self, mock_popen, temp_workspace): cmd = mock_popen.call_args[0][0] assert "--engine" in cmd - assert "plan" in cmd + assert "react" in cmd class TestStartBatchEngineParam: @@ -380,7 +380,7 @@ def test_start_batch_passes_engine_to_subprocess( @patch("codeframe.core.conductor._execute_task_subprocess") def test_start_batch_default_engine(self, mock_subprocess, workspace_with_tasks): - """start_batch without engine param should default to 'plan'.""" + """start_batch without engine param should default to 'react'.""" workspace, task_list = workspace_with_tasks task_ids = [t.id for t in task_list] @@ -392,7 +392,7 @@ def test_start_batch_default_engine(self, mock_subprocess, workspace_with_tasks) strategy="serial", ) - assert batch.engine == "plan" + assert batch.engine == "react" class TestBackwardCompatibility: @@ -401,28 +401,29 @@ class TestBackwardCompatibility: @patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"}) @patch("codeframe.core.streaming.RunOutputLogger") @patch("codeframe.adapters.llm.get_provider") - @patch("codeframe.core.agent.Agent") + @patch("codeframe.core.react_agent.ReactAgent") def test_execute_agent_without_engine_param( - self, mock_agent_cls, mock_get_provider, mock_output_logger, temp_workspace + self, mock_react_cls, mock_get_provider, mock_output_logger, temp_workspace ): - """Calling execute_agent without engine should work (backward compatible).""" + """Calling execute_agent without engine should use ReactAgent (default).""" from codeframe.core.runtime import execute_agent, start_task_run task = tasks.create(temp_workspace, title="Test", status=TaskStatus.READY) run = start_task_run(temp_workspace, task.id) + # ReactAgent.run() returns AgentStatus, not AgentState mock_agent = MagicMock() - mock_agent.run.return_value = AgentState(status=AgentStatus.COMPLETED) - mock_agent_cls.return_value = mock_agent + mock_agent.run.return_value = AgentStatus.COMPLETED + mock_react_cls.return_value = mock_agent - # Call without engine parameter - should still work + # Call without engine parameter - should use ReactAgent (default) state = execute_agent(temp_workspace, run) assert state.status == AgentStatus.COMPLETED - mock_agent_cls.assert_called_once() + mock_react_cls.assert_called_once() def test_batch_run_without_engine_field(self, temp_workspace): - """BatchRun created without engine should default to 'plan'.""" + """BatchRun created without engine should default to 'react'.""" batch = BatchRun( id="compat-test", workspace_id=temp_workspace.id, @@ -434,4 +435,4 @@ def test_batch_run_without_engine_field(self, temp_workspace): started_at=datetime.now(timezone.utc), completed_at=None, ) - assert batch.engine == "plan" + assert batch.engine == "react" diff --git a/tests/integration/test_tasks_v2_engine.py b/tests/integration/test_tasks_v2_engine.py index 01389702..deaac5f7 100644 --- a/tests/integration/test_tasks_v2_engine.py +++ b/tests/integration/test_tasks_v2_engine.py @@ -52,7 +52,7 @@ def _make_task(workspace, title="Test task", status=TaskStatus.READY): return task -def _make_batch_run(workspace_id, task_ids, engine="plan"): +def _make_batch_run(workspace_id, task_ids, engine="react"): """Build a BatchRun stub for mock return values.""" return BatchRun( id=str(uuid.uuid4()), @@ -108,7 +108,7 @@ class TestExecuteEndpointEngine: """Tests for POST /api/v2/tasks/execute engine parameter.""" def test_execute_default_engine(self, tmp_path, client): - """Default engine should be 'plan' when not specified.""" + """Default engine should be 'react' when not specified.""" ws = _make_workspace(tmp_path) task = _make_task(ws) @@ -128,7 +128,7 @@ def test_execute_default_engine(self, tmp_path, client): assert data["success"] is True mock_batch.assert_called_once() _, kwargs = mock_batch.call_args - assert kwargs["engine"] == "plan" + assert kwargs["engine"] == "react" def test_execute_with_react_engine(self, tmp_path, client): """Passing engine='react' should forward it to conductor.""" @@ -195,7 +195,7 @@ class TestStartSingleTaskEngine: """Tests for POST /api/v2/tasks/{task_id}/start engine parameter.""" def test_start_single_default_engine(self, tmp_path, client): - """Default engine should be 'plan' when query param not provided.""" + """Default engine should be 'react' when query param not provided.""" ws = _make_workspace(tmp_path) task = _make_task(ws) run = _make_run(ws.id, task.id) From 373b419e15bc9e5fd06f5ebb304cdf8adda209c2 Mon Sep 17 00:00:00 2001 From: Test User Date: Sun, 15 Feb 2026 22:14:33 -0700 Subject: [PATCH 2/2] fix: address PR review feedback for #355 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Fix engine_mode label to hide default "react" instead of "plan" - Add language annotations to execution flow code blocks in CLAUDE.md - Update CLAUDE.md last-updated date to 2026-02-15 - Reconcile Phase 2 status in V2_STRATEGIC_ROADMAP.md (now ✅ COMPLETE) - Update test assertion to match new default engine label behavior --- CLAUDE.md | 8 ++++---- codeframe/cli/app.py | 2 +- docs/V2_STRATEGIC_ROADMAP.md | 2 +- tests/cli/test_v2_cli_integration.py | 3 ++- 4 files changed, 8 insertions(+), 7 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index 0671a1d9..ca3d21ac 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,10 +1,10 @@ # CodeFRAME Development Guidelines (v2 Reset) -Last updated: 2026-02-03 +Last updated: 2026-02-15 This repo is in an **in-place v2 refactor** ("strangler rewrite"). The goal is to deliver a **headless, CLI-first Golden Path** and treat all UI/server layers as optional adapters. -**Status: Phase 1 Complete ✅ | Phase 2 Complete ✅** - Server layer with full REST API, authentication, rate limiting, and real-time streaming. See `docs/V2_STRATEGIC_ROADMAP.md` for the 5-phase plan. +**Status: Phase 1 ✅ | Phase 2 ✅ | Phase 2.5 ✅** - ReAct agent is default engine. Server layer with full REST API, authentication, rate limiting, and real-time streaming. See `docs/V2_STRATEGIC_ROADMAP.md` for the 5-phase plan. If you are an agent working in this repo: **do not improvise architecture**. Follow the documents listed below. @@ -245,7 +245,7 @@ CodeFRAME supports two execution engines, selected via `--engine`: | **Plan** (legacy) | `--engine plan` | Plan all steps → Execute sequentially | Well-defined, predictable tasks | ### Execution Flow (ReAct — default) -``` +```text cf work start --execute [--verbose] │ ├── runtime.start_task_run() # Creates run, transitions task→IN_PROGRESS @@ -273,7 +273,7 @@ cf work start --execute [--verbose] ``` ### Execution Flow (Plan — legacy, `--engine plan`) -``` +```text cf work start --execute --engine plan │ ├── runtime.start_task_run() diff --git a/codeframe/cli/app.py b/codeframe/cli/app.py index 9898b8e1..85c8404c 100644 --- a/codeframe/cli/app.py +++ b/codeframe/cli/app.py @@ -2056,7 +2056,7 @@ def work_start( mode = "[dim](dry run)[/dim]" if dry_run else "" debug_mode = " [dim](debug logging enabled)[/dim]" if debug else "" verbose_mode = " [dim](verbose)[/dim]" if verbose else "" - engine_mode = f" [dim](engine={engine})[/dim]" if engine != "plan" else "" + engine_mode = f" [dim](engine={engine})[/dim]" if engine != "react" else "" console.print(f"\n[bold]Executing agent...{mode}{debug_mode}{verbose_mode}{engine_mode}[/bold]") try: diff --git a/docs/V2_STRATEGIC_ROADMAP.md b/docs/V2_STRATEGIC_ROADMAP.md index ce46ec81..863533af 100644 --- a/docs/V2_STRATEGIC_ROADMAP.md +++ b/docs/V2_STRATEGIC_ROADMAP.md @@ -79,7 +79,7 @@ CodeFRAME v2 CLI **Phase 1 is complete** with a production-ready foundation. The ## Phase 2: Server Layer as Thin Adapter **Goal**: FastAPI server exposing core functionality via REST + real-time events. -**Status**: 🔄 **90% COMPLETE** - Core routes done, finishing real-time events + docs +**Status**: ✅ **COMPLETE** ### Deliverables diff --git a/tests/cli/test_v2_cli_integration.py b/tests/cli/test_v2_cli_integration.py index a59e7fc8..ef827643 100644 --- a/tests/cli/test_v2_cli_integration.py +++ b/tests/cli/test_v2_cli_integration.py @@ -995,7 +995,8 @@ def test_react_verbose_mode(self, workspace_with_ready_tasks, mock_llm): ], ) assert result.exit_code == 0, f"react verbose failed: {result.output}" - assert "engine=react" in result.output + # engine=react is the default, so the label should be hidden + assert "engine=react" not in result.output assert "[ReactAgent]" in result.output assert provider.call_count >= 1