diff --git a/src/millstone/prompts/tasklist_prompt.md b/src/millstone/prompts/tasklist_prompt.md index 754c515..c46a2b0 100644 --- a/src/millstone/prompts/tasklist_prompt.md +++ b/src/millstone/prompts/tasklist_prompt.md @@ -20,6 +20,7 @@ You are an autonomous developer working in: {{WORKING_DIRECTORY}} - Allowed edits: - Code/tests needed for the chosen task. - The tasklist only to (a) check off the chosen task and (b) text-only coherence updates to future tasks if strictly necessary. +- **DO NOT COMMIT**: Do not run `git commit` or `git push`. Leave changes staged or unstaged — the orchestrator commits after review. - **FORBIDDEN**: - Completing, partially completing, or reordering any other tasks. - Implementing future tasks. diff --git a/src/millstone/runtime/orchestrator.py b/src/millstone/runtime/orchestrator.py index 2d9a0f8..66ee3c2 100755 --- a/src/millstone/runtime/orchestrator.py +++ b/src/millstone/runtime/orchestrator.py @@ -79,6 +79,7 @@ class BuilderArtifact: output: str git_status: str git_diff: str + builder_committed: bool = False @dataclass @@ -680,7 +681,14 @@ def __init__( else: if self.roadmap: print(f"Roadmap: {self.roadmap}") - print(f"Tasklist: {self.tasklist}") + from millstone.artifact_providers.mcp import MCPTasklistProvider + + tl_provider = self._outer_loop_manager.tasklist_provider + if isinstance(tl_provider, MCPTasklistProvider): + label_str = ", ".join(tl_provider._labels) if tl_provider._labels else "none" + print(f"Tasklist: {tl_provider._mcp_server} (labels: {label_str})") + else: + print(f"Tasklist: {self.tasklist}") print(f"Max tasks: {self.max_tasks}") if self.compact_threshold > 0: print(f"Compact threshold: {self.compact_threshold}") @@ -1682,6 +1690,71 @@ def delegate_commit(self) -> bool: self.last_commit_failure = failure_info return success + def _auto_commit_tasklist_if_needed(self) -> bool: + """Auto-commit the tasklist checkbox if the builder left it unstaged. + + When the builder commits code early but forgets to include the tasklist + tick, this ensures the task is still marked complete so it won't be + re-selected on the next run. Only applies to file-backed tasklists. + + Returns True if no action was needed or the commit succeeded. + Returns False if the commit failed (task not fully completed). + """ + import subprocess + + from millstone.artifact_providers.mcp import MCPTasklistProvider + + provider = self._outer_loop_manager.tasklist_provider + if isinstance(provider, MCPTasklistProvider): + return True # MCP tasks are marked done via API, not file commits + + status = self.git("status", "--porcelain").strip() + if not status: + return True + + remaining_files = [line.split()[-1] for line in status.split("\n") if line.strip()] + tasklist_path = str(self.tasklist) + + if len(remaining_files) == 1 and remaining_files[0] == tasklist_path: + self.log( + "auto_commit_tasklist", + reason="builder_early_commit_forgot_tasklist", + file=tasklist_path, + ) + add_result = subprocess.run( + ["git", "add", tasklist_path], + cwd=self.repo_dir, + capture_output=True, + ) + if add_result.returncode != 0: + progress( + f"{self._task_prefix()} ERROR: git add for tasklist failed " + f"(rc={add_result.returncode})" + ) + return False + commit_result = subprocess.run( + [ + "git", + "commit", + "-m", + "Mark task complete in tasklist\n\nGenerated with millstone orchestrator", + ], + cwd=self.repo_dir, + capture_output=True, + ) + if commit_result.returncode != 0: + progress( + f"{self._task_prefix()} ERROR: git commit for tasklist failed " + f"(rc={commit_result.returncode})" + ) + return False + progress( + f"{self._task_prefix()} Auto-committed tasklist tick " + "(builder committed code but forgot to stage tasklist)" + ) + + return True + def load_prompt(self, name: str) -> str: """Load a prompt file from custom dir or package resources.""" if self._custom_prompts_dir: @@ -1983,13 +2056,37 @@ def analyze_tasklist(self) -> dict: label_str = ", ".join(provider._labels) if provider._labels else "none" print(f"Remote tasklist provider: {provider._mcp_server}") print(f" Labels: {label_str}") + + # Fetch live task counts from the remote provider + pending_count = 0 + completed_count = 0 + total_count = 0 + try: + if provider._agent_callback is None: + provider.set_agent_callback( + lambda p, **k: self.run_agent(p, role="author", **k) + ) + provider.invalidate_cache() + tasks = provider.list_tasks() + from millstone.artifacts.models import TaskStatus + + for t in tasks: + if t.status == TaskStatus.done: + completed_count += 1 + else: + pending_count += 1 + total_count = len(tasks) + print(f" Open tasks: {pending_count}") + except Exception: + print(" Open tasks: unable to fetch task count") + print() - print("Task analysis is not available for remote providers.") + print("Detailed task analysis is not available for remote providers.") print("Use --dry-run to inspect prompts that would be sent.") return { - "pending_count": 0, - "completed_count": 0, - "total_count": 0, + "pending_count": pending_count, + "completed_count": completed_count, + "total_count": total_count, "tasks": [], "dependencies": [], "suggested_order": [], @@ -2861,6 +2958,7 @@ def run_single_task(self) -> bool: self._task_previous_diff = None # Determine task text and metadata + _mcp_task_item: Any = None # Set when MCP provider supplies the current task if self.task: task_display = self.task[:50] + "..." if len(self.task) > 50 else self.task self.current_task_title = task_display @@ -2870,23 +2968,52 @@ def run_single_task(self) -> bool: self.apply_risk_settings(task_metadata.get("risk")) self.current_task_group = None else: - self.current_task_title = self.extract_current_task_title() or "task" - task_text = self.current_task_title - self.apply_risk_settings(self.extract_current_task_risk()) - self.current_task_group = self.extract_current_task_group() + # When the tasklist is MCP-backed, derive task title and ID from the + # remote provider's cached task list instead of reading a local file + # that may not exist or may be stale. + from millstone.artifact_providers.mcp import MCPTasklistProvider + from millstone.artifacts.models import TaskStatus + + provider = self._outer_loop_manager.tasklist_provider + if isinstance(provider, MCPTasklistProvider): + cached = provider.list_tasks() + pending = [ + t for t in cached if t.status in (TaskStatus.todo, TaskStatus.in_progress) + ] + if pending: + _mcp_task_item = pending[0] + self.current_task_title = _mcp_task_item.title or "task" + task_text = self.current_task_title + self.apply_risk_settings(_mcp_task_item.risk) + self.current_task_group = None + else: + self.current_task_title = "task" + task_text = self.current_task_title + self.apply_risk_settings(None) + self.current_task_group = None + else: + self.current_task_title = self.extract_current_task_title() or "task" + task_text = self.current_task_title + self.apply_risk_settings(self.extract_current_task_risk()) + self.current_task_group = self.extract_current_task_group() self._current_task_text = task_text # Prefer canonical task_id from metadata (stable, ontology-compliant identity). - # In tasklist mode, use the raw block (title + indented metadata) so that - # "- ID: my-task" lines are visible to the parser. - # In --task mode, task_text already contains the full metadata block. - if self.task: + # When an MCP provider supplied the task, use its remote ID directly. + if _mcp_task_item is not None: + self._current_task_id = _mcp_task_item.task_id + elif self.task: _task_meta = self._tasklist_manager._parse_task_metadata(task_text) + self._current_task_id = _task_meta.get("task_id") or ( + re.sub(r"[^a-z0-9]+", "-", task_text.lower()).strip("-")[:30] or None + ) else: + # In tasklist mode, use the raw block (title + indented metadata) so that + # "- ID: my-task" lines are visible to the parser. _task_meta = self._tasklist_manager.extract_current_task_metadata() - self._current_task_id = _task_meta.get("task_id") or ( - re.sub(r"[^a-z0-9]+", "-", task_text.lower()).strip("-")[:30] or None - ) + self._current_task_id = _task_meta.get("task_id") or ( + re.sub(r"[^a-z0-9]+", "-", task_text.lower()).strip("-")[:30] or None + ) # Worker mode: when --shared-state-dir is set, emit result.json + heartbeats # for the worktree control plane to consume. @@ -3001,6 +3128,15 @@ def _worker_finish( self.log("research_completed", task=task_text[:200], output_file=str(output_file)) if not self.task: self.mark_task_complete() + # Close remote task for MCP providers — mark_task_complete() + # only updates the local tasklist file, which is a no-op when + # the task lives on a remote backend (GitHub Issues, Linear, …). + from millstone.artifact_providers.mcp import MCPTasklistProvider + from millstone.artifacts.models import TaskStatus + + provider = self._outer_loop_manager.tasklist_provider + if isinstance(provider, MCPTasklistProvider) and self._current_task_id: + provider.update_task_status(self._current_task_id, TaskStatus.done) progress(f"{self._task_prefix()} Research completed -> {output_file}") _worker_finish("success", review_summary=builder_output[:4000]) return True @@ -3012,6 +3148,9 @@ def _worker_finish( # Track empty-diff retry state empty_diff_retried = False + # Record HEAD before builder runs so we can detect early commits + pre_build_head = self.git("rev-parse", "HEAD").strip() + # Define Loop components def builder_producer(feedback: str | None = None) -> BuilderArtifact: if feedback: @@ -3034,7 +3173,38 @@ def builder_producer(feedback: str | None = None) -> BuilderArtifact: # Ensure untracked files are included in diff (for reviewer visibility) self._stage_untracked_files() - return BuilderArtifact(output, self.git("status", "--short"), self.git("diff", "HEAD")) + # Detect if builder committed its own changes (HEAD advanced) + nonlocal pre_build_head + current_head = self.git("rev-parse", "HEAD").strip() + builder_committed = current_head != pre_build_head + + if builder_committed: + progress( + f"{self._task_prefix()} Builder committed changes directly " + f"(HEAD advanced from {pre_build_head[:8]} to {current_head[:8]}). " + "Using committed diff for review." + ) + self.log( + "builder_early_commit", + old_head=pre_build_head, + new_head=current_head, + ) + # Use the diff from the builder's commit(s) for review. + # Also include any uncommitted changes on top of the builder's commits. + git_diff = self.git("diff", pre_build_head) + git_status = self.git("diff", "--stat", pre_build_head) + else: + git_diff = self.git("diff", "HEAD") + git_status = self.git("status", "--short") + + # Update pre_build_head so the next cycle (if reviewer rejects) + # correctly detects whether the builder commits again. + if builder_committed: + pre_build_head = current_head + + return BuilderArtifact( + output, git_status, git_diff, builder_committed=builder_committed + ) def builder_validator(artifact: BuilderArtifact) -> tuple[bool, str | None]: nonlocal empty_diff_retried @@ -3051,7 +3221,12 @@ def builder_validator(artifact: BuilderArtifact) -> tuple[bool, str | None]: # Retry once on empty diff before sanity check fails # This catches cases where the builder didn't make changes but should have - if not artifact.git_status.strip() and not empty_diff_retried: + # Skip nudge if builder already committed (diff extracted from commits) + if ( + not artifact.git_status.strip() + and not empty_diff_retried + and not artifact.builder_committed + ): empty_diff_retried = True progress(f"{self._task_prefix()} No changes detected, nudging builder to retry...") nudge_msg = ( @@ -3156,6 +3331,48 @@ def builder_on_success(artifact: BuilderArtifact, verdict: BuilderVerdict) -> bo loop_state["failure_reason"] = "eval_gate_failed" return False + if artifact.builder_committed: + # Builder committed during this cycle. Check if there are + # remaining uncommitted changes (e.g. from a review-fix cycle + # where the builder edited files without committing again). + worktree_status = self.git("status", "--porcelain").strip() + tasklist_path = str(self.tasklist) + non_tasklist_dirty = any( + line.split()[-1] != tasklist_path + for line in worktree_status.split("\n") + if line.strip() + ) + + if non_tasklist_dirty: + # There are uncommitted changes beyond the tasklist — + # delegate a commit for the remaining edits. + progress( + f"{self._task_prefix()} Builder committed earlier but " + "uncommitted changes remain — delegating commit." + ) + if not self.delegate_commit(): + loop_state["failure_reason"] = "commit_failed" + return False + else: + progress( + f"{self._task_prefix()} Skipping commit delegation (builder already committed)." + ) + # Auto-commit the tasklist checkbox if it's the only + # remaining dirty file (file-backed tasklist only). + if not self._auto_commit_tasklist_if_needed(): + loop_state["failure_reason"] = "tasklist_commit_failed" + return False + + # Update baseline so next task measures LoC from this commit + self._update_loc_baseline() + + if self.eval_on_commit and not self._run_eval_on_commit(task_text=task_text): + loop_state["failure_reason"] = "eval_regression" + return False + + self.accumulate_group_context(task_text, git_diff=artifact.git_diff) + return True + if self.delegate_commit(): if self.eval_on_commit and not self._run_eval_on_commit(task_text=task_text): loop_state["failure_reason"] = "eval_regression" diff --git a/tests/test_integration.py b/tests/test_integration.py index dfef638..644dbf3 100644 --- a/tests/test_integration.py +++ b/tests/test_integration.py @@ -426,6 +426,295 @@ def test_no_changes_detected(self, temp_repo, capsys): finally: orch.cleanup() + def test_builder_early_commit_detected(self, temp_repo, capsys): + """ + Scenario: Builder commits its own changes (HEAD advances). + Expected: Orchestrator detects the early commit, uses the committed + diff for review, skips delegate_commit, and succeeds. + """ + + def make_change_and_commit(repo): + """Simulate builder creating a file AND committing it.""" + (repo / "feature.py").write_text("def new_feature():\n pass\n") + _original_subprocess_run(["git", "add", "."], cwd=repo, capture_output=True) + _original_subprocess_run( + ["git", "commit", "-m", "feat: builder committed early"], + cwd=repo, + capture_output=True, + ) + + # Builder commits directly; no commit delegation needed + responses = ( + ResponseBuilder() + .on_builder(change_fn=make_change_and_commit, output="Implemented feature.") + .on_reviewer(approve=True) + .build() + ) + + orch = Orchestrator(max_tasks=1) + try: + with patch("subprocess.run", side_effect=make_mock_runner(temp_repo, responses)): + exit_code = orch.run() + + assert exit_code == 0 + captured = capsys.readouterr() + # Should detect the early commit + assert "Builder committed changes directly" in captured.out + # Should skip commit delegation + assert "Skipping commit delegation" in captured.out + assert orch.cycle == 1 + finally: + orch.cleanup() + + def test_builder_early_commit_auto_commits_tasklist(self, temp_repo, capsys): + """ + Scenario: Builder commits code but leaves the tasklist checkbox unstaged. + Expected: Orchestrator detects the early commit, auto-commits the + tasklist tick so the task won't be re-selected on the next run. + + Uses docs/tasklist.md (git-tracked) to exercise the fallback path. + """ + + def commit_code_but_leave_tasklist(repo): + """Builder commits code but modifies tasklist without staging it.""" + (repo / "feature.py").write_text("def new_feature():\n pass\n") + _original_subprocess_run(["git", "add", "feature.py"], cwd=repo, capture_output=True) + _original_subprocess_run( + ["git", "commit", "-m", "feat: builder committed early"], + cwd=repo, + capture_output=True, + ) + # Modify tasklist (check off the task) but don't stage/commit it + tasklist = repo / "docs" / "tasklist.md" + tasklist.write_text( + "# Tasklist\n\n- [x] Task 1: Do something\n- [ ] Task 2: Do another thing\n" + ) + + responses = ( + ResponseBuilder() + .on_builder(change_fn=commit_code_but_leave_tasklist, output="Implemented feature.") + .on_reviewer(approve=True) + .build() + ) + + orch = Orchestrator(max_tasks=1, tasklist="docs/tasklist.md") + try: + with patch("subprocess.run", side_effect=make_mock_runner(temp_repo, responses)): + exit_code = orch.run() + + assert exit_code == 0 + captured = capsys.readouterr() + assert "Builder committed changes directly" in captured.out + assert "Skipping commit delegation" in captured.out + # Should auto-commit the tasklist tick + assert "Auto-committed tasklist tick" in captured.out + + # Verify no uncommitted changes remain + status = _original_subprocess_run( + ["git", "status", "--porcelain"], + cwd=temp_repo, + capture_output=True, + text=True, + ) + assert status.stdout.strip() == "" + + # Verify the tasklist was actually committed + log = _original_subprocess_run( + ["git", "log", "--oneline", "-3"], + cwd=temp_repo, + capture_output=True, + text=True, + ) + assert "Mark task complete" in log.stdout + finally: + orch.cleanup() + + def test_builder_early_commit_then_review_fix_uncommitted(self, temp_repo, capsys): + """ + Scenario: Builder commits in cycle 1, reviewer rejects, builder fixes + in cycle 2 WITHOUT committing. The orchestrator must detect the dirty + worktree and delegate a commit for the remaining edits. + + Regression test for: builder_on_success skipping delegate_commit when + builder_committed was True, leaving uncommitted changes. + """ + cycle_count = [0] + + def make_change_and_commit(repo): + """Cycle 1: builder creates file AND commits it.""" + (repo / "feature.py").write_text("def new_feature():\n return 1\n") + _original_subprocess_run(["git", "add", "."], cwd=repo, capture_output=True) + _original_subprocess_run( + ["git", "commit", "-m", "feat: builder committed early"], + cwd=repo, + capture_output=True, + ) + cycle_count[0] += 1 + + def make_fix_without_commit(repo): + """Cycle 2: builder edits file but does NOT commit.""" + (repo / "feature.py").write_text("def new_feature():\n return 2 # fixed\n") + _original_subprocess_run(["git", "add", "."], cwd=repo, capture_output=True) + cycle_count[0] += 1 + + build_count = [0] + + def responses(prompt, counts): + if is_builder_prompt(prompt) or "address this review feedback" in prompt.lower(): + build_count[0] += 1 + if build_count[0] == 1: + return ("Implemented feature.", make_change_and_commit) + else: + return ("Fixed review feedback.", make_fix_without_commit) + elif is_reviewer_prompt(prompt): + if build_count[0] <= 1: + return ( + '{"status": "REQUEST_CHANGES", "review": "Return value should be 2", ' + '"summary": "Needs fix", "findings": ["Wrong return value"], ' + '"findings_by_severity": {"critical": [], "high": ["Wrong return value"], ' + '"medium": [], "low": [], "nit": []}}', + None, + ) + return ( + '{"status": "APPROVED", "review": "Looks good", "summary": "LGTM", ' + '"findings": [], "findings_by_severity": {"critical": [], "high": [], ' + '"medium": [], "low": [], "nit": []}}', + None, + ) + elif is_commit_prompt(prompt): + return ("Committed changes.", lambda repo: do_commit(repo)) + elif is_sanity_check(prompt): + return ('{"status": "OK"}', None) + return ('{"status": "OK"}', None) + + orch = Orchestrator(max_tasks=1, max_cycles=3) + try: + with patch("subprocess.run", side_effect=make_mock_runner(temp_repo, responses)): + exit_code = orch.run() + + assert exit_code == 0 + captured = capsys.readouterr() + # Cycle 1 should detect early commit + assert "Builder committed changes directly" in captured.out + # Cycle 2: builder didn't commit, so normal delegate_commit runs + assert "Delegating commit to builder" in captured.out + + # Verify no uncommitted changes remain + status = _original_subprocess_run( + ["git", "status", "--porcelain"], + cwd=temp_repo, + capture_output=True, + text=True, + ) + assert status.stdout.strip() == "", ( + "Working directory should be clean — all changes committed" + ) + finally: + orch.cleanup() + + def test_builder_early_commit_updates_loc_baseline(self, temp_repo, capsys): + """ + Scenario: Builder commits directly (early commit) in a multi-task run. + Expected: loc_baseline_ref is updated after the early-commit path so the + next task's LoC measurement starts from the new HEAD, not the old baseline. + + Regression test for: builder_on_success skipping _update_loc_baseline() + when builder_committed was True, causing stale baselines in multi-task runs. + """ + + def make_change_and_commit(repo): + (repo / "feature.py").write_text("def new_feature():\n pass\n") + _original_subprocess_run(["git", "add", "."], cwd=repo, capture_output=True) + _original_subprocess_run( + ["git", "commit", "-m", "feat: builder committed early"], + cwd=repo, + capture_output=True, + ) + + responses = ( + ResponseBuilder() + .on_builder(change_fn=make_change_and_commit, output="Implemented feature.") + .on_reviewer(approve=True) + .build() + ) + + orch = Orchestrator(max_tasks=1) + try: + with patch("subprocess.run", side_effect=make_mock_runner(temp_repo, responses)): + exit_code = orch.run() + + assert exit_code == 0 + # After the early-commit path, loc_baseline_ref should point to HEAD + current_head = _original_subprocess_run( + ["git", "rev-parse", "HEAD"], + cwd=temp_repo, + capture_output=True, + text=True, + ).stdout.strip() + assert orch.loc_baseline_ref == current_head, ( + f"loc_baseline_ref should be updated to HEAD ({current_head}) " + f"but was {orch.loc_baseline_ref}" + ) + finally: + orch.cleanup() + + def test_builder_early_commit_tasklist_autocommit_git_failure(self, temp_repo, capsys): + """ + Scenario: Builder commits code early but the tasklist auto-commit fails. + Expected: A warning is printed and the failure is not silently swallowed. + + Regression test for: _auto_commit_tasklist_if_needed ignoring git failures. + """ + + def commit_code_but_leave_tasklist(repo): + (repo / "feature.py").write_text("def new_feature():\n pass\n") + _original_subprocess_run(["git", "add", "feature.py"], cwd=repo, capture_output=True) + _original_subprocess_run( + ["git", "commit", "-m", "feat: builder committed early"], + cwd=repo, + capture_output=True, + ) + # Modify tasklist but don't stage it + tasklist = repo / "docs" / "tasklist.md" + tasklist.write_text( + "# Tasklist\n\n- [x] Task 1: Do something\n- [ ] Task 2: Do another thing\n" + ) + + responses = ( + ResponseBuilder() + .on_builder(change_fn=commit_code_but_leave_tasklist, output="Implemented feature.") + .on_reviewer(approve=True) + .build() + ) + + orch = Orchestrator(max_tasks=1, tasklist="docs/tasklist.md") + try: + # Patch subprocess.run to fail on git commit for tasklist + real_mock = make_mock_runner(temp_repo, responses) + + def mock_with_commit_failure(cmd, **kwargs): + # Intercept the tasklist auto-commit + if ( + cmd[0] == "git" + and cmd[1] == "commit" + and len(cmd) > 3 + and "Mark task complete" in cmd[3] + ): + return MagicMock(returncode=1, stdout=b"", stderr=b"commit failed") + return real_mock(cmd, **kwargs) + + with patch("subprocess.run", side_effect=mock_with_commit_failure): + exit_code = orch.run() + + # Task must fail — tasklist not marked complete means task can be + # re-selected on the next run, breaking completion semantics. + assert exit_code == 1 + captured = capsys.readouterr() + # Should report the failed tasklist commit + assert "ERROR: git commit for tasklist failed" in captured.out + finally: + orch.cleanup() + def test_sensitive_file_no_longer_halts_flow(self, temp_repo, capsys): """ Scenario: Builder modifies a sensitive file. diff --git a/tests/test_orchestrator.py b/tests/test_orchestrator.py index 314a466..4e26724 100644 --- a/tests/test_orchestrator.py +++ b/tests/test_orchestrator.py @@ -1869,6 +1869,72 @@ def test_run_single_task_research_mode_with_direct_task(self, temp_repo): finally: orch.cleanup() + def test_run_single_task_research_mode_closes_mcp_task(self, temp_repo): + """Research mode closes remote task via update_task_status for MCP providers. + + Verifies that the real remote task ID (not a local-file-derived fallback) + is passed to update_task_status. + """ + from unittest.mock import MagicMock, patch + + from millstone.artifact_providers.mcp import MCPTasklistProvider + from millstone.artifacts.models import TasklistItem, TaskStatus + + tasklist = temp_repo / "docs" / "tasklist.md" + tasklist.write_text("# Tasks\n\n- [ ] Investigate auth flow\n") + + with patch("subprocess.run") as mock_run: + mock_run.return_value = MagicMock( + returncode=0, + stdout="## FINDINGS\n\nAuth uses OAuth2\n", + stderr="", + ) + + orch = Orchestrator(tasklist="docs/tasklist.md", research=True, cli="claude") + try: + # Inject a mock MCP tasklist provider with a known remote task ID + mock_mcp = MagicMock(spec=MCPTasklistProvider) + mock_mcp.get_prompt_placeholders.return_value = {} + mock_mcp.list_tasks.return_value = [ + TasklistItem( + task_id="GH-42", + title="Investigate auth flow", + status=TaskStatus.todo, + ), + ] + orch._outer_loop_manager.tasklist_provider = mock_mcp + + result = orch.run_single_task() + assert result is True + + # MCP provider should have been asked to close the exact remote task + mock_mcp.update_task_status.assert_called_once_with("GH-42", TaskStatus.done) + finally: + orch.cleanup() + + def test_run_single_task_research_mode_direct_task_skips_mcp_close(self, temp_repo): + """Research mode with --task (direct task) does NOT attempt MCP close.""" + from unittest.mock import MagicMock, patch + + from millstone.artifact_providers.mcp import MCPTasklistProvider + + with patch("subprocess.run") as mock_run: + mock_run.return_value = MagicMock(returncode=0, stdout="Research output", stderr="") + + orch = Orchestrator(task="Check perf", research=True, cli="claude") + try: + mock_mcp = MagicMock(spec=MCPTasklistProvider) + mock_mcp.get_prompt_placeholders.return_value = {} + orch._outer_loop_manager.tasklist_provider = mock_mcp + + result = orch.run_single_task() + assert result is True + + # With --task, mark_task_complete + MCP close should be skipped + mock_mcp.update_task_status.assert_not_called() + finally: + orch.cleanup() + class TestIsApproved: """Tests for approval detection.""" @@ -15631,18 +15697,77 @@ def test_analyze_tasklist_shows_no_file_message(self, temp_repo, capsys): def test_analyze_tasklist_with_mcp_provider(self, temp_repo, capsys): """--analyze-tasklist with MCP provider shows remote info, not file error.""" + from unittest.mock import patch + from millstone.artifact_providers.mcp import MCPTasklistProvider orch = Orchestrator(dry_run=False, quiet=True) try: provider = MCPTasklistProvider("github", labels=["millstone"]) orch._outer_loop_manager.tasklist_provider = provider - result = orch.analyze_tasklist() + with patch.object(provider, "list_tasks", return_value=[]): + with patch.object(provider, "invalidate_cache"): + result = orch.analyze_tasklist() captured = capsys.readouterr() assert "Remote tasklist provider: github" in captured.out assert "Labels: millstone" in captured.out assert "Tasklist not found" not in captured.out + assert "Open tasks: 0" in captured.out + assert result["pending_count"] == 0 + assert result["total_count"] == 0 + finally: + orch.cleanup() + + def test_analyze_tasklist_with_mcp_provider_shows_open_count(self, temp_repo, capsys): + """--analyze-tasklist with MCP provider fetches and displays live task counts.""" + from unittest.mock import patch + + from millstone.artifact_providers.mcp import MCPTasklistProvider + from millstone.artifacts.models import TasklistItem, TaskStatus + + orch = Orchestrator(dry_run=False, quiet=True) + try: + provider = MCPTasklistProvider("github", labels=["millstone"]) + mock_tasks = [ + TasklistItem(task_id="1", title="Open task A", status=TaskStatus.todo), + TasklistItem(task_id="2", title="Open task B", status=TaskStatus.in_progress), + TasklistItem(task_id="3", title="Done task", status=TaskStatus.done), + TasklistItem(task_id="4", title="Blocked task", status=TaskStatus.blocked), + ] + orch._outer_loop_manager.tasklist_provider = provider + with patch.object(provider, "list_tasks", return_value=mock_tasks): + with patch.object(provider, "invalidate_cache"): + result = orch.analyze_tasklist() + + captured = capsys.readouterr() + assert "Remote tasklist provider: github" in captured.out + assert "Open tasks: 3" in captured.out + assert result["pending_count"] == 3 + assert result["completed_count"] == 1 + assert result["total_count"] == 4 + finally: + orch.cleanup() + + def test_analyze_tasklist_with_mcp_provider_fetch_error(self, temp_repo, capsys): + """--analyze-tasklist degrades gracefully when MCP fetch fails.""" + from unittest.mock import patch + + from millstone.artifact_providers.mcp import MCPTasklistProvider + + orch = Orchestrator(dry_run=False, quiet=True) + try: + provider = MCPTasklistProvider("github", labels=["millstone"]) + orch._outer_loop_manager.tasklist_provider = provider + with patch.object( + provider, "list_tasks", side_effect=RuntimeError("connection failed") + ): + with patch.object(provider, "invalidate_cache"): + result = orch.analyze_tasklist() + + captured = capsys.readouterr() + assert "Remote tasklist provider: github" in captured.out + assert "unable to fetch task count" in captured.out assert result["pending_count"] == 0 assert result["total_count"] == 0 finally: