diff --git a/CLAUDE.md b/CLAUDE.md index 80dddc15..ca3d21ac 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -1,10 +1,10 @@ # CodeFRAME Development Guidelines (v2 Reset) -Last updated: 2026-02-03 +Last updated: 2026-02-15 This repo is in an **in-place v2 refactor** ("strangler rewrite"). The goal is to deliver a **headless, CLI-first Golden Path** and treat all UI/server layers as optional adapters. -**Status: Phase 1 Complete ✅ | Phase 2 Complete ✅** - Server layer with full REST API, authentication, rate limiting, and real-time streaming. See `docs/V2_STRATEGIC_ROADMAP.md` for the 5-phase plan. +**Status: Phase 1 ✅ | Phase 2 ✅ | Phase 2.5 ✅** - ReAct agent is default engine. Server layer with full REST API, authentication, rate limiting, and real-time streaming. See `docs/V2_STRATEGIC_ROADMAP.md` for the 5-phase plan. If you are an agent working in this repo: **do not improvise architecture**. Follow the documents listed below. @@ -31,13 +31,14 @@ If you are an agent working in this repo: **do not improvise architecture**. Fol --- -## Current Reality (Phase 1 & 2 Complete) +## Current Reality (Phase 1, 2 & 2.5 Complete) ### What's Working Now -- **Full agent execution**: `cf work start --execute` +- **Full agent execution**: `cf work start --execute` (uses ReAct engine by default) +- **Engine selection**: `--engine react` (default) or `--engine plan` (legacy) - **Verbose mode**: `cf work start --execute --verbose` shows detailed progress - **Dry run mode**: `cf work start --execute --dry-run` -- **Self-correction loop**: Agent automatically fixes failing verification gates (up to 3 attempts) +- **Self-correction loop**: Agent automatically fixes failing verification gates (up to 5 attempts with ReAct) - **FAILED task status**: Tasks can transition to FAILED for proper error visibility - **Tech stack configuration**: `cf init . --detect` auto-detects tech stack from project files - **Project preferences**: Agent loads AGENTS.md or CLAUDE.md for per-project configuration @@ -79,9 +80,12 @@ If you are an agent working in this repo: **do not improvise architecture**. Fol ``` codeframe/ ├── core/ # Headless domain + orchestration (NO FastAPI imports) -│ ├── agent.py # Agent orchestrator with blocker detection -│ ├── planner.py # LLM-powered implementation planning -│ ├── executor.py # Code execution engine with rollback +│ ├── react_agent.py # ReAct agent (default engine) - observe-think-act loop +│ ├── tools.py # Tool definitions for ReAct agent (7 tools) +│ ├── editor.py # Search-replace file editor with fuzzy matching +│ ├── agent.py # Legacy plan-based agent (--engine plan) +│ ├── planner.py # LLM-powered implementation planning (plan engine) +│ ├── executor.py # Code execution engine with rollback (plan engine) │ ├── context.py # Task context loader with relevance scoring │ ├── tasks.py # Task management with depends_on field │ ├── blockers.py # Human-in-the-loop blocker system @@ -200,14 +204,17 @@ At all times: | Component | File | Purpose | |-----------|------|---------| +| **ReactAgent** | **`core/react_agent.py`** | **Default engine: observe-think-act loop with tool use** | +| **Tools** | **`core/tools.py`** | **7 agent tools: read/edit/create file, run command/tests, search, list** | +| **Editor** | **`core/editor.py`** | **Search-replace editor with 4-level fuzzy matching** | | LLM Adapter | `adapters/llm/base.py` | Protocol, ModelSelector, Purpose enum | | Anthropic Provider | `adapters/llm/anthropic.py` | Claude integration with streaming | | Mock Provider | `adapters/llm/mock.py` | Testing with call tracking | | Context Loader | `core/context.py` | Codebase scanning, relevance scoring | -| Planner | `core/planner.py` | Task → ImplementationPlan via LLM | -| Executor | `core/executor.py` | File ops, shell commands, rollback | -| Agent | `core/agent.py` | Orchestration loop, blocker detection | -| Runtime | `core/runtime.py` | Run lifecycle, agent invocation | +| Planner | `core/planner.py` | Task → ImplementationPlan via LLM (plan engine) | +| Executor | `core/executor.py` | File ops, shell commands, rollback (plan engine) | +| Agent (legacy) | `core/agent.py` | Plan-based orchestration (--engine plan) | +| Runtime | `core/runtime.py` | Run lifecycle, engine selection, agent invocation | | Conductor | `core/conductor.py` | Batch orchestration, worker pool | | Dependency Graph | `core/dependency_graph.py` | DAG operations, topological sort | | Dependency Analyzer | `core/dependency_analyzer.py` | LLM-based dependency inference | @@ -228,13 +235,50 @@ Task-based heuristic via `Purpose` enum: Future: `cf tasks set provider ` for per-task override. -### Execution Flow -``` +### Engine Selection + +CodeFRAME supports two execution engines, selected via `--engine`: + +| Engine | Flag | Pattern | Best For | +|--------|------|---------|----------| +| **ReAct** (default) | `--engine react` | Observe → Think → Act loop | Most tasks, adaptive execution | +| **Plan** (legacy) | `--engine plan` | Plan all steps → Execute sequentially | Well-defined, predictable tasks | + +### Execution Flow (ReAct — default) +```text cf work start --execute [--verbose] │ ├── runtime.start_task_run() # Creates run, transitions task→IN_PROGRESS │ - └── runtime.execute_agent(verbose=True/False) + └── runtime.execute_agent(engine="react") + │ + └── ReactAgent.run(task_id) + ├── Load context (PRD, codebase, blockers, AGENTS.md, tech_stack) + ├── Build layered system prompt + │ + └── Tool-use loop (until complete/blocked/failed): + ├── LLM decides next action (tool call) + ├── Execute tool: read_file, edit_file, create_file, + │ run_command, run_tests, search_codebase, list_files + ├── Observe result → feed back to LLM + ├── Incremental verification (ruff after file changes) + └── Token budget management (3-tier compaction) + │ + └── Final verification with self-correction (up to 5 retries) + │ + └── Update run/task status based on agent result + ├── COMPLETED → complete_run() → task→DONE + ├── BLOCKED → block_run() → task→BLOCKED + └── FAILED → fail_run() → task→FAILED +``` + +### Execution Flow (Plan — legacy, `--engine plan`) +```text +cf work start --execute --engine plan + │ + ├── runtime.start_task_run() + │ + └── runtime.execute_agent(engine="plan") │ ├── agent.run(task_id) │ ├── Load context (PRD, codebase, blockers, AGENTS.md) @@ -289,7 +333,8 @@ cf tasks show # Work execution (single task) cf work start # Creates run record -cf work start --execute # Runs AI agent +cf work start --execute # Runs AI agent (ReAct engine, default) +cf work start --execute --engine plan # Use legacy plan engine cf work start --execute --verbose # With detailed output cf work start --execute --dry-run # Preview changes cf work stop # Cancel stale run @@ -298,13 +343,14 @@ cf work follow # Stream real-time output cf work follow --tail 50 # Show last 50 lines then stream # Batch execution (multiple tasks) -cf work batch run ... # Execute multiple tasks +cf work batch run ... # Execute multiple tasks (ReAct default) cf work batch run --all-ready # All READY tasks +cf work batch run --all-ready --engine plan # Use legacy plan engine cf work batch run --strategy serial # Serial (default) cf work batch run --strategy parallel # Parallel execution cf work batch run --strategy auto # LLM-inferred dependencies cf work batch run --max-parallel 4 # Concurrent limit -cf work batch run --retry 3 # Auto-retry failures +cf work batch run --retry 3 # Auto-retry failures cf work batch status [batch_id] # Show batch status cf work batch cancel # Cancel running batch cf work batch resume # Re-run failed tasks @@ -360,6 +406,11 @@ Do not expand frontend scope during Golden Path work. - `docs/AGENT_IMPLEMENTATION_TASKS.md` - Agent system components - `docs/V2_STRATEGIC_ROADMAP.md` - 5-phase plan from CLI to multi-agent +### Agent Architecture (Phase 2.5) +- `docs/AGENT_V3_UNIFIED_PLAN.md` - ReAct architecture design and rules +- `docs/REACT_AGENT_ARCHITECTURE.md` - Deep-dive: tools, editor, token management +- `docs/REACT_AGENT_ANALYSIS.md` - Golden path test run analysis + ### API Documentation (Phase 2) - `/docs` - Swagger UI (interactive API explorer) - `/redoc` - ReDoc (readable API documentation) @@ -406,19 +457,37 @@ If you are unsure which direction to take, default to: --- -## Recent Updates (2026-02-03) +## Recent Updates (2026-02-15) -### Phase 2 Complete: Server Layer -All Phase 2 deliverables are complete: +### Phase 2.5 Complete: ReAct Agent Architecture (#355) +Default execution engine switched from plan-based to **ReAct (Reasoning + Acting)**. + +**What changed:** +- Default engine is now `"react"` — all `cf work start --execute` and `cf work batch run` commands use ReactAgent +- Legacy plan engine available via `--engine plan` flag +- ReactAgent uses iterative tool-use loop (observe → think → act) instead of plan-all-then-execute +- 7 structured tools: `read_file`, `edit_file`, `create_file`, `run_command`, `run_tests`, `search_codebase`, `list_files` +- Search-replace editing with 4-level fuzzy matching (exact → whitespace-normalized → indentation-agnostic → fuzzy) +- Token budget management with 3-tier compaction +- Adaptive iteration budget based on task complexity + +**Phase 2.5 deliverables:** +- ✅ ReAct agent implementation (`core/react_agent.py`, `core/tools.py`, `core/editor.py`) +- ✅ CLI `--engine` flag (#353) +- ✅ API engine parameter (#354) +- ✅ Default switch to react + documentation (#355) | Phase | Focus | Status | |-------|-------|--------| | 1 | CLI Completion | ✅ **Complete** | | 2 | Server Layer | ✅ **Complete** | +| 2.5 | ReAct Agent | ✅ **Complete** | | 3 | Web UI Rebuild | Planned | | 4 | Multi-Agent Coordination | Planned | | 5 | Advanced Features | Planned | +### Phase 2 Complete: Server Layer (2026-02-03) + **Phase 2 deliverables completed:** - ✅ Server audit and refactor (#322) - 15 v2 routers following thin adapter pattern - ✅ API key authentication (#326) - Scopes: read/write/admin diff --git a/codeframe/cli/app.py b/codeframe/cli/app.py index 5bc10ade..85c8404c 100644 --- a/codeframe/cli/app.py +++ b/codeframe/cli/app.py @@ -1994,9 +1994,9 @@ def work_start( help="Run stub agent (for testing, does nothing real)", ), engine: str = typer.Option( - "plan", + "react", "--engine", - help="Agent engine: 'plan' (default, step-based) or 'react' (ReAct tool-use loop)", + help="Agent engine: 'react' (default, ReAct tool-use loop) or 'plan' (legacy step-based)", ), ) -> None: """Start working on a task. @@ -2007,7 +2007,7 @@ def work_start( Example: codeframe work start abc123 codeframe work start abc123 --execute - codeframe work start abc123 --execute --engine react + codeframe work start abc123 --execute --engine plan codeframe work start abc123 --execute --dry-run codeframe work start abc123 --execute --verbose """ @@ -2056,7 +2056,7 @@ def work_start( mode = "[dim](dry run)[/dim]" if dry_run else "" debug_mode = " [dim](debug logging enabled)[/dim]" if debug else "" verbose_mode = " [dim](verbose)[/dim]" if verbose else "" - engine_mode = f" [dim](engine={engine})[/dim]" if engine != "plan" else "" + engine_mode = f" [dim](engine={engine})[/dim]" if engine != "react" else "" console.print(f"\n[bold]Executing agent...{mode}{debug_mode}{verbose_mode}{engine_mode}[/bold]") try: @@ -2860,9 +2860,9 @@ def batch_run( help="Run verification gates (pytest, ruff) after successful batch completion", ), engine: str = typer.Option( - "plan", + "react", "--engine", - help="Agent engine: 'plan' (default, step-based) or 'react' (ReAct tool-use loop)", + help="Agent engine: 'react' (default, ReAct tool-use loop) or 'plan' (legacy step-based)", ), ) -> None: """Execute multiple tasks in batch. @@ -2876,7 +2876,7 @@ def batch_run( codeframe work batch run task1 task2 task3 codeframe work batch run --all-ready codeframe work batch run --all-ready --strategy serial - codeframe work batch run --all-ready --engine react + codeframe work batch run --all-ready --engine plan codeframe work batch run task1 task2 --dry-run codeframe work batch run task1 task2 --retry 2 """ diff --git a/codeframe/core/conductor.py b/codeframe/core/conductor.py index b6e1ff5a..6af50631 100644 --- a/codeframe/core/conductor.py +++ b/codeframe/core/conductor.py @@ -456,7 +456,7 @@ class BatchRun: started_at: datetime completed_at: Optional[datetime] results: dict[str, str] = field(default_factory=dict) - engine: str = "plan" + engine: str = "react" def start_batch( @@ -468,7 +468,7 @@ def start_batch( dry_run: bool = False, max_retries: int = 0, on_event: Optional[Callable[[str, dict], None]] = None, - engine: str = "plan", + engine: str = "react", ) -> BatchRun: """Start a batch execution of multiple tasks. @@ -481,7 +481,7 @@ def start_batch( dry_run: If True, don't actually execute tasks max_retries: Max retry attempts for failed tasks (0 = no retries) on_event: Optional callback for batch events - engine: Agent engine to use ("plan" or "react") + engine: Agent engine to use ("react" default, or "plan" for legacy) Returns: BatchRun with results populated @@ -1699,7 +1699,7 @@ def _execute_task_subprocess( workspace: Workspace, task_id: str, batch_id: Optional[str] = None, - engine: str = "plan", + engine: str = "react", ) -> str: """Execute a single task via subprocess. diff --git a/codeframe/core/runtime.py b/codeframe/core/runtime.py index 17aa2848..4abb301d 100644 --- a/codeframe/core/runtime.py +++ b/codeframe/core/runtime.py @@ -596,7 +596,7 @@ def execute_agent( verbose: bool = False, fix_coordinator: Optional["GlobalFixCoordinator"] = None, event_publisher: Optional["EventPublisher"] = None, - engine: str = "plan", + engine: str = "react", ) -> "AgentState": """Execute a task using the agent orchestrator. @@ -611,7 +611,7 @@ def execute_agent( verbose: If True, print detailed progress to stdout fix_coordinator: Optional coordinator for global fixes (for parallel execution) event_publisher: Optional EventPublisher for SSE streaming (real-time events) - engine: Agent engine to use ("plan" for existing Agent, "react" for ReactAgent) + engine: Agent engine to use ("react" for ReactAgent (default), "plan" for legacy Agent) Returns: Final AgentState after execution diff --git a/codeframe/ui/routers/tasks_v2.py b/codeframe/ui/routers/tasks_v2.py index 875edbba..da20f243 100644 --- a/codeframe/ui/routers/tasks_v2.py +++ b/codeframe/ui/routers/tasks_v2.py @@ -52,8 +52,8 @@ class ApproveTasksRequest(BaseModel): description="Whether to start batch execution after approval", ) engine: str = Field( - "plan", - description="Execution engine: 'plan' (default) or 'react' (ReAct loop)", + "react", + description="Execution engine: 'react' (default, ReAct loop) or 'plan' (legacy step-based)", ) @model_validator(mode="after") @@ -109,8 +109,8 @@ class StartExecutionRequest(BaseModel): description="Number of retries for failed tasks", ) engine: str = Field( - "plan", - description="Execution engine: 'plan' (default) or 'react' (ReAct loop)", + "react", + description="Execution engine: 'react' (default, ReAct loop) or 'plan' (legacy step-based)", ) @model_validator(mode="after") @@ -586,7 +586,7 @@ async def start_single_task( execute: bool = Query(False, description="Run agent execution (requires ANTHROPIC_API_KEY)"), dry_run: bool = Query(False, description="Preview changes without making them"), verbose: bool = Query(False, description="Show detailed progress output"), - engine: Literal["plan", "react"] = Query("plan", description="Execution engine: 'plan' (default) or 'react' (ReAct loop)"), + engine: Literal["plan", "react"] = Query("react", description="Execution engine: 'react' (default, ReAct loop) or 'plan' (legacy step-based)"), workspace: Workspace = Depends(get_v2_workspace), ) -> dict[str, Any]: """Start a single task run. diff --git a/docs/AGENT_V3_UNIFIED_PLAN.md b/docs/AGENT_V3_UNIFIED_PLAN.md index b4167684..065ae4cf 100644 --- a/docs/AGENT_V3_UNIFIED_PLAN.md +++ b/docs/AGENT_V3_UNIFIED_PLAN.md @@ -1,7 +1,7 @@ # Agent V3: Unified Architectural Plan **Date**: 2026-02-07 -**Status**: Final Draft — Synthesized from research team debate +**Status**: ✅ Implemented — Default engine since 2026-02-15 (#355) **Sources**: AGENT_ARCHITECTURE_RESEARCH.md, AGENT_FRAMEWORK_DEEP_DIVE.md, AGENT_ARCHITECTURE_CRITIQUE.md, REACT_AGENT_ARCHITECTURE.md --- @@ -27,7 +27,7 @@ This plan redesigns CodeFRAME's agent execution from Plan-and-Execute to a **Hyb 3. **Lint after every file change** — catch errors immediately, not after 92 accumulate 4. **Model is the planner** — the LLM decides what to do next based on observed reality 5. **Fewer tools = higher accuracy** — 7 focused tools, not a large surface area -6. **Backward compatible** — `--engine plan` preserved as default until ReAct is validated +6. **Backward compatible** — `--engine plan` available as fallback (ReAct is now default) --- @@ -39,7 +39,7 @@ This plan redesigns CodeFRAME's agent execution from Plan-and-Execute to a **Hyb cf work start --execute [--engine react] │ ├── runtime.start_task_run() - │ └── Select engine: "plan" (default, existing) or "react" (new) + │ └── Select engine: "react" (default) or "plan" (legacy) │ └── runtime.execute_agent(engine="react") │ diff --git a/docs/V2_STRATEGIC_ROADMAP.md b/docs/V2_STRATEGIC_ROADMAP.md index 303f4254..863533af 100644 --- a/docs/V2_STRATEGIC_ROADMAP.md +++ b/docs/V2_STRATEGIC_ROADMAP.md @@ -1,8 +1,8 @@ # CodeFRAME v2 Strategic Roadmap **Created**: 2026-01-29 -**Updated**: 2026-02-03 -**Status**: Active - Phase 2 In Progress +**Updated**: 2026-02-15 +**Status**: Active - Phase 2.5 Complete, Phase 3 Next ## Executive Summary @@ -79,7 +79,7 @@ CodeFRAME v2 CLI **Phase 1 is complete** with a production-ready foundation. The ## Phase 2: Server Layer as Thin Adapter **Goal**: FastAPI server exposing core functionality via REST + real-time events. -**Status**: 🔄 **90% COMPLETE** - Core routes done, finishing real-time events + docs +**Status**: ✅ **COMPLETE** ### Deliverables @@ -164,6 +164,59 @@ See `docs/PHASE_2_DEVELOPER_GUIDE.md` for implementation guide. --- +## Phase 2.5: ReAct Agent Architecture ✅ COMPLETE + +**Goal**: Replace plan-then-execute agent with iterative ReAct (Reasoning + Acting) loop as the default engine. +**Status**: ✅ **COMPLETE** (2026-02-15) + +### Motivation + +The plan-based agent had several failure modes discovered during testing: +- Config file overwrites (whole-file generation ignores existing content) +- Cross-file naming inconsistency (each file generated in isolation) +- Accumulated lint errors (no incremental verification) +- Ineffective self-correction (empty error context) + +### Deliverables + +1. **ReAct Agent Implementation** - ✅ COMPLETE + - `core/react_agent.py` - Observe-Think-Act loop with tool use + - `core/tools.py` - 7 structured tools (read/edit/create file, run command/tests, search, list) + - `core/editor.py` - Search-replace editor with 4-level fuzzy matching + +2. **Engine Selection** - ✅ COMPLETE + - `--engine react` (default) or `--engine plan` (legacy) on all work commands + - Runtime routes to ReactAgent or Agent based on engine parameter + - API endpoints support engine parameter with validation + +3. **CLI Validation** (#353) - ✅ COMPLETE + - `--engine` flag on `cf work start` and `cf work batch run` + - Default switched to "react" + +4. **API Validation** (#354) - ✅ COMPLETE + - Engine parameter on execute, approve, and stream endpoints + - Backward compatible — omitting engine uses "react" default + +5. **Default Switch + Documentation** (#355) - ✅ COMPLETE + - Default engine changed from "plan" to "react" across CLI, API, and runtime + - CLAUDE.md updated with ReAct architecture documentation + +### Key Architecture Decisions + +- **Search-replace editing**: ~98% accuracy vs ~70-80% for whole-file regeneration +- **Read before write**: Agent always sees actual file state before editing +- **Lint after every change**: Catch errors immediately, not after they accumulate +- **7 focused tools**: Fewer tools = higher accuracy +- **Token budget management**: 3-tier compaction prevents context window overflow +- **Adaptive iteration budget**: Task complexity scoring adjusts iteration limits + +### Reference Documentation +- `docs/AGENT_V3_UNIFIED_PLAN.md` - Architecture design and rules +- `docs/REACT_AGENT_ARCHITECTURE.md` - Deep-dive on tools, editor, token management +- `docs/PHASE_25_VALIDATION_REPORT.md` - End-to-end validation results + +--- + ## Phase 3: Web UI Rebuild **Goal**: Modern dashboard consuming REST/WebSocket API. @@ -317,9 +370,10 @@ After each phase: | Phase | Focus | Key Outcome | Status | |-------|-------|-------------|--------| | 1 | CLI Completion | Production-ready headless agent | ✅ **COMPLETE** | -| 2 | Server Layer | REST API + real-time events | 🔄 **90% COMPLETE** | +| 2 | Server Layer | REST API + real-time events | ✅ **COMPLETE** | +| 2.5 | ReAct Agent | Iterative tool-use execution engine | ✅ **COMPLETE** | | 3 | Web UI | Modern dashboard | Planned | | 4 | Multi-Agent | Agent swarms | Planned | | 5 | Advanced | Power features | Planned | -**Current focus**: Phase 2 - Completing remaining items (WebSocket events, OpenAPI docs, pagination). +**Current focus**: Phase 3 - Web UI rebuild on v2 foundation. diff --git a/tests/cli/test_v2_cli_integration.py b/tests/cli/test_v2_cli_integration.py index b2cce076..ef827643 100644 --- a/tests/cli/test_v2_cli_integration.py +++ b/tests/cli/test_v2_cli_integration.py @@ -873,6 +873,7 @@ def test_execute_dry_run(self, workspace_with_ready_tasks, mock_llm): [ "work", "start", tid, "--execute", "--dry-run", + "--engine", "plan", "-w", str(workspace_with_ready_tasks), ], ) @@ -882,7 +883,7 @@ def test_execute_dry_run(self, workspace_with_ready_tasks, mock_llm): assert provider.call_count >= 1 def test_execute_creates_file(self, workspace_with_ready_tasks, mock_llm): - """work start --execute runs agent that creates a file via MockProvider.""" + """work start --execute --engine plan runs agent that creates a file via MockProvider.""" # Plan says create hello.py, executor generates content via LLM provider = mock_llm([MOCK_PLAN_RESPONSE, MOCK_FILE_CONTENT]) @@ -896,6 +897,7 @@ def test_execute_creates_file(self, workspace_with_ready_tasks, mock_llm): [ "work", "start", tid, "--execute", + "--engine", "plan", "-w", str(workspace_with_ready_tasks), ], ) @@ -949,7 +951,7 @@ def test_ai_golden_path(self, temp_repo, mock_llm): assert len(task_list) > 0 tid = task_list[0].id[:8] - r = runner.invoke(app, ["work", "start", tid, "--execute", "-w", wp]) + r = runner.invoke(app, ["work", "start", tid, "--execute", "--engine", "plan", "-w", wp]) assert r.exit_code == 0, f"work start --execute: {r.output}" # Verify LLM was exercised through the full path @@ -993,7 +995,8 @@ def test_react_verbose_mode(self, workspace_with_ready_tasks, mock_llm): ], ) assert result.exit_code == 0, f"react verbose failed: {result.output}" - assert "engine=react" in result.output + # engine=react is the default, so the label should be hidden + assert "engine=react" not in result.output assert "[ReactAgent]" in result.output assert provider.call_count >= 1 diff --git a/tests/core/test_agent_streaming.py b/tests/core/test_agent_streaming.py index e468fcb7..2d82b75d 100644 --- a/tests/core/test_agent_streaming.py +++ b/tests/core/test_agent_streaming.py @@ -151,21 +151,19 @@ def test_execute_agent_creates_output_logger(self, temp_workspace: Workspace): """Runtime execute_agent should create an output logger for the run.""" from codeframe.core import runtime, tasks as tasks_module from codeframe.core.streaming import run_output_exists + from codeframe.core.agent import AgentStatus # Create task and run task = tasks_module.create(temp_workspace, title="Test task") run = runtime.start_task_run(temp_workspace, task.id) - # Mock the Agent class at its definition location - with patch("codeframe.core.agent.Agent") as MockAgent, \ + # Mock the ReactAgent class (default engine is now "react") + with patch("codeframe.core.react_agent.ReactAgent") as MockReact, \ patch("codeframe.adapters.llm.get_provider"): mock_agent = MagicMock() - mock_agent.run.return_value = MagicMock( - status=MagicMock(value="completed"), - blocker=None, - ) - MockAgent.return_value = mock_agent + mock_agent.run.return_value = AgentStatus.COMPLETED + MockReact.return_value = mock_agent # Patch os.getenv to provide API key with patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"}): @@ -178,9 +176,10 @@ def test_execute_agent_creates_output_logger(self, temp_workspace: Workspace): assert run_output_exists(temp_workspace, run.id) def test_output_logger_passed_to_agent(self, temp_workspace: Workspace): - """Runtime should pass the output logger to the Agent.""" + """Runtime should pass the output logger to the ReactAgent (default engine).""" from codeframe.core import runtime, tasks as tasks_module from codeframe.core.streaming import RunOutputLogger + from codeframe.core.agent import AgentStatus task = tasks_module.create(temp_workspace, title="Test task") run = runtime.start_task_run(temp_workspace, task.id) @@ -191,13 +190,10 @@ def capture_agent(*args, **kwargs): nonlocal captured_logger captured_logger = kwargs.get("output_logger") mock = MagicMock() - mock.run.return_value = MagicMock( - status=MagicMock(value="completed"), - blocker=None, - ) + mock.run.return_value = AgentStatus.COMPLETED return mock - with patch("codeframe.core.agent.Agent", side_effect=capture_agent), \ + with patch("codeframe.core.react_agent.ReactAgent", side_effect=capture_agent), \ patch("codeframe.adapters.llm.get_provider"), \ patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"}): try: diff --git a/tests/core/test_react_engine_integration.py b/tests/core/test_react_engine_integration.py index 4a706119..7e59e323 100644 --- a/tests/core/test_react_engine_integration.py +++ b/tests/core/test_react_engine_integration.py @@ -65,24 +65,24 @@ class TestRuntimeEngineSelection: @patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"}) @patch("codeframe.core.streaming.RunOutputLogger") @patch("codeframe.adapters.llm.get_provider") - @patch("codeframe.core.agent.Agent") - def test_default_engine_uses_plan_agent( - self, mock_agent_cls, mock_get_provider, mock_output_logger, temp_workspace + @patch("codeframe.core.react_agent.ReactAgent") + def test_default_engine_uses_react_agent( + self, mock_react_cls, mock_get_provider, mock_output_logger, temp_workspace ): - """Default engine ('plan') should use the existing Agent class.""" + """Default engine ('react') should use the ReactAgent class.""" from codeframe.core.runtime import execute_agent, start_task_run task = tasks.create(temp_workspace, title="Test", status=TaskStatus.READY) run = start_task_run(temp_workspace, task.id) - # Mock agent + # Mock agent — ReactAgent.run() returns AgentStatus, not AgentState mock_agent = MagicMock() - mock_agent.run.return_value = AgentState(status=AgentStatus.COMPLETED) - mock_agent_cls.return_value = mock_agent + mock_agent.run.return_value = AgentStatus.COMPLETED + mock_react_cls.return_value = mock_agent state = execute_agent(temp_workspace, run) - mock_agent_cls.assert_called_once() + mock_react_cls.assert_called_once() assert state.status == AgentStatus.COMPLETED @patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"}) @@ -223,7 +223,7 @@ class TestBatchRunEngineField: """Tests for engine field on BatchRun dataclass.""" def test_batch_run_default_engine(self): - """BatchRun should default to engine='plan'.""" + """BatchRun should default to engine='react'.""" batch = BatchRun( id="test-batch", workspace_id="ws-1", @@ -235,7 +235,7 @@ def test_batch_run_default_engine(self): started_at=datetime.now(timezone.utc), completed_at=None, ) - assert batch.engine == "plan" + assert batch.engine == "react" def test_batch_run_react_engine(self): """BatchRun should accept engine='react'.""" @@ -281,7 +281,7 @@ def test_save_and_load_batch_with_engine(self, temp_workspace): assert loaded.engine == "react" def test_save_and_load_batch_default_engine(self, temp_workspace): - """Default engine ('plan') should persist correctly.""" + """Default engine ('react') should persist correctly.""" from codeframe.core.conductor import get_batch batch = BatchRun( @@ -300,7 +300,7 @@ def test_save_and_load_batch_default_engine(self, temp_workspace): loaded = get_batch(temp_workspace, "test-engine-default") assert loaded is not None - assert loaded.engine == "plan" + assert loaded.engine == "react" class TestSubprocessCommandConstruction: @@ -330,8 +330,8 @@ def test_subprocess_includes_engine_flag(self, mock_popen, temp_workspace): assert "react" in cmd @patch("codeframe.core.conductor.subprocess.Popen") - def test_subprocess_default_engine_is_plan(self, mock_popen, temp_workspace): - """Default engine should be 'plan' in subprocess command.""" + def test_subprocess_default_engine_is_react(self, mock_popen, temp_workspace): + """Default engine should be 'react' in subprocess command.""" from codeframe.core.conductor import _execute_task_subprocess from codeframe.core.runtime import RunStatus @@ -348,7 +348,7 @@ def test_subprocess_default_engine_is_plan(self, mock_popen, temp_workspace): cmd = mock_popen.call_args[0][0] assert "--engine" in cmd - assert "plan" in cmd + assert "react" in cmd class TestStartBatchEngineParam: @@ -380,7 +380,7 @@ def test_start_batch_passes_engine_to_subprocess( @patch("codeframe.core.conductor._execute_task_subprocess") def test_start_batch_default_engine(self, mock_subprocess, workspace_with_tasks): - """start_batch without engine param should default to 'plan'.""" + """start_batch without engine param should default to 'react'.""" workspace, task_list = workspace_with_tasks task_ids = [t.id for t in task_list] @@ -392,7 +392,7 @@ def test_start_batch_default_engine(self, mock_subprocess, workspace_with_tasks) strategy="serial", ) - assert batch.engine == "plan" + assert batch.engine == "react" class TestBackwardCompatibility: @@ -401,28 +401,29 @@ class TestBackwardCompatibility: @patch.dict("os.environ", {"ANTHROPIC_API_KEY": "test-key"}) @patch("codeframe.core.streaming.RunOutputLogger") @patch("codeframe.adapters.llm.get_provider") - @patch("codeframe.core.agent.Agent") + @patch("codeframe.core.react_agent.ReactAgent") def test_execute_agent_without_engine_param( - self, mock_agent_cls, mock_get_provider, mock_output_logger, temp_workspace + self, mock_react_cls, mock_get_provider, mock_output_logger, temp_workspace ): - """Calling execute_agent without engine should work (backward compatible).""" + """Calling execute_agent without engine should use ReactAgent (default).""" from codeframe.core.runtime import execute_agent, start_task_run task = tasks.create(temp_workspace, title="Test", status=TaskStatus.READY) run = start_task_run(temp_workspace, task.id) + # ReactAgent.run() returns AgentStatus, not AgentState mock_agent = MagicMock() - mock_agent.run.return_value = AgentState(status=AgentStatus.COMPLETED) - mock_agent_cls.return_value = mock_agent + mock_agent.run.return_value = AgentStatus.COMPLETED + mock_react_cls.return_value = mock_agent - # Call without engine parameter - should still work + # Call without engine parameter - should use ReactAgent (default) state = execute_agent(temp_workspace, run) assert state.status == AgentStatus.COMPLETED - mock_agent_cls.assert_called_once() + mock_react_cls.assert_called_once() def test_batch_run_without_engine_field(self, temp_workspace): - """BatchRun created without engine should default to 'plan'.""" + """BatchRun created without engine should default to 'react'.""" batch = BatchRun( id="compat-test", workspace_id=temp_workspace.id, @@ -434,4 +435,4 @@ def test_batch_run_without_engine_field(self, temp_workspace): started_at=datetime.now(timezone.utc), completed_at=None, ) - assert batch.engine == "plan" + assert batch.engine == "react" diff --git a/tests/integration/test_tasks_v2_engine.py b/tests/integration/test_tasks_v2_engine.py index 01389702..deaac5f7 100644 --- a/tests/integration/test_tasks_v2_engine.py +++ b/tests/integration/test_tasks_v2_engine.py @@ -52,7 +52,7 @@ def _make_task(workspace, title="Test task", status=TaskStatus.READY): return task -def _make_batch_run(workspace_id, task_ids, engine="plan"): +def _make_batch_run(workspace_id, task_ids, engine="react"): """Build a BatchRun stub for mock return values.""" return BatchRun( id=str(uuid.uuid4()), @@ -108,7 +108,7 @@ class TestExecuteEndpointEngine: """Tests for POST /api/v2/tasks/execute engine parameter.""" def test_execute_default_engine(self, tmp_path, client): - """Default engine should be 'plan' when not specified.""" + """Default engine should be 'react' when not specified.""" ws = _make_workspace(tmp_path) task = _make_task(ws) @@ -128,7 +128,7 @@ def test_execute_default_engine(self, tmp_path, client): assert data["success"] is True mock_batch.assert_called_once() _, kwargs = mock_batch.call_args - assert kwargs["engine"] == "plan" + assert kwargs["engine"] == "react" def test_execute_with_react_engine(self, tmp_path, client): """Passing engine='react' should forward it to conductor.""" @@ -195,7 +195,7 @@ class TestStartSingleTaskEngine: """Tests for POST /api/v2/tasks/{task_id}/start engine parameter.""" def test_start_single_default_engine(self, tmp_path, client): - """Default engine should be 'plan' when query param not provided.""" + """Default engine should be 'react' when query param not provided.""" ws = _make_workspace(tmp_path) task = _make_task(ws) run = _make_run(ws.id, task.id)