diff --git a/src/millstone/prompts/tasklist_prompt.md b/src/millstone/prompts/tasklist_prompt.md
index 754c515..c46a2b0 100644
--- a/src/millstone/prompts/tasklist_prompt.md
+++ b/src/millstone/prompts/tasklist_prompt.md
@@ -20,6 +20,7 @@ You are an autonomous developer working in: {{WORKING_DIRECTORY}}
 - Allowed edits:
   - Code/tests needed for the chosen task.
   - The tasklist only to (a) check off the chosen task and (b) text-only coherence updates to future tasks if strictly necessary.
+- **DO NOT COMMIT**: Do not run `git commit` or `git push`. Leave changes staged or unstaged — the orchestrator commits after review.
 - **FORBIDDEN**:
   - Completing, partially completing, or reordering any other tasks.
   - Implementing future tasks.
diff --git a/src/millstone/runtime/orchestrator.py b/src/millstone/runtime/orchestrator.py
index 2d9a0f8..66ee3c2 100755
--- a/src/millstone/runtime/orchestrator.py
+++ b/src/millstone/runtime/orchestrator.py
@@ -79,6 +79,7 @@ class BuilderArtifact:
     output: str
     git_status: str
     git_diff: str
+    builder_committed: bool = False
 
 
 @dataclass
@@ -680,7 +681,14 @@ def __init__(
             else:
                 if self.roadmap:
                     print(f"Roadmap: {self.roadmap}")
-                print(f"Tasklist: {self.tasklist}")
+                from millstone.artifact_providers.mcp import MCPTasklistProvider
+
+                tl_provider = self._outer_loop_manager.tasklist_provider
+                if isinstance(tl_provider, MCPTasklistProvider):
+                    label_str = ", ".join(tl_provider._labels) if tl_provider._labels else "none"
+                    print(f"Tasklist: {tl_provider._mcp_server} (labels: {label_str})")
+                else:
+                    print(f"Tasklist: {self.tasklist}")
                 print(f"Max tasks: {self.max_tasks}")
                 if self.compact_threshold > 0:
                     print(f"Compact threshold: {self.compact_threshold}")
@@ -1682,6 +1690,71 @@ def delegate_commit(self) -> bool:
             self.last_commit_failure = failure_info
         return success
 
+    def _auto_commit_tasklist_if_needed(self) -> bool:
+        """Auto-commit the tasklist checkbox if the builder left it unstaged.
+
+        When the builder commits code early but forgets to include the tasklist
+        tick, this ensures the task is still marked complete so it won't be
+        re-selected on the next run. Only applies to file-backed tasklists.
+
+        Returns True if no action was needed or the commit succeeded.
+        Returns False if the commit failed (task not fully completed).
+        """
+        import subprocess
+
+        from millstone.artifact_providers.mcp import MCPTasklistProvider
+
+        provider = self._outer_loop_manager.tasklist_provider
+        if isinstance(provider, MCPTasklistProvider):
+            return True  # MCP tasks are marked done via API, not file commits
+
+        status = self.git("status", "--porcelain").strip()
+        if not status:
+            return True
+
+        remaining_files = [line.split()[-1] for line in status.split("\n") if line.strip()]
+        tasklist_path = str(self.tasklist)
+
+        if len(remaining_files) == 1 and remaining_files[0] == tasklist_path:
+            self.log(
+                "auto_commit_tasklist",
+                reason="builder_early_commit_forgot_tasklist",
+                file=tasklist_path,
+            )
+            add_result = subprocess.run(
+                ["git", "add", tasklist_path],
+                cwd=self.repo_dir,
+                capture_output=True,
+            )
+            if add_result.returncode != 0:
+                progress(
+                    f"{self._task_prefix()} ERROR: git add for tasklist failed "
+                    f"(rc={add_result.returncode})"
+                )
+                return False
+            commit_result = subprocess.run(
+                [
+                    "git",
+                    "commit",
+                    "-m",
+                    "Mark task complete in tasklist\n\nGenerated with millstone orchestrator",
+                ],
+                cwd=self.repo_dir,
+                capture_output=True,
+            )
+            if commit_result.returncode != 0:
+                progress(
+                    f"{self._task_prefix()} ERROR: git commit for tasklist failed "
+                    f"(rc={commit_result.returncode})"
+                )
+                return False
+            progress(
+                f"{self._task_prefix()} Auto-committed tasklist tick "
+                "(builder committed code but forgot to stage tasklist)"
+            )
+
+        return True
+
     def load_prompt(self, name: str) -> str:
         """Load a prompt file from custom dir or package resources."""
         if self._custom_prompts_dir:
@@ -1983,13 +2056,37 @@ def analyze_tasklist(self) -> dict:
             label_str = ", ".join(provider._labels) if provider._labels else "none"
             print(f"Remote tasklist provider: {provider._mcp_server}")
             print(f"  Labels: {label_str}")
+
+            # Fetch live task counts from the remote provider
+            pending_count = 0
+            completed_count = 0
+            total_count = 0
+            try:
+                if provider._agent_callback is None:
+                    provider.set_agent_callback(
+                        lambda p, **k: self.run_agent(p, role="author", **k)
+                    )
+                provider.invalidate_cache()
+                tasks = provider.list_tasks()
+                from millstone.artifacts.models import TaskStatus
+
+                for t in tasks:
+                    if t.status == TaskStatus.done:
+                        completed_count += 1
+                    else:
+                        pending_count += 1
+                total_count = len(tasks)
+                print(f"  Open tasks: {pending_count}")
+            except Exception:
+                print("  Open tasks: unable to fetch task count")
+
             print()
-            print("Task analysis is not available for remote providers.")
+            print("Detailed task analysis is not available for remote providers.")
             print("Use --dry-run to inspect prompts that would be sent.")
             return {
-                "pending_count": 0,
-                "completed_count": 0,
-                "total_count": 0,
+                "pending_count": pending_count,
+                "completed_count": completed_count,
+                "total_count": total_count,
                 "tasks": [],
                 "dependencies": [],
                 "suggested_order": [],
@@ -2861,6 +2958,7 @@ def run_single_task(self) -> bool:
         self._task_previous_diff = None
 
         # Determine task text and metadata
+        _mcp_task_item: Any = None  # Set when MCP provider supplies the current task
         if self.task:
             task_display = self.task[:50] + "..." if len(self.task) > 50 else self.task
             self.current_task_title = task_display
@@ -2870,23 +2968,52 @@ def run_single_task(self) -> bool:
             self.apply_risk_settings(task_metadata.get("risk"))
             self.current_task_group = None
         else:
-            self.current_task_title = self.extract_current_task_title() or "task"
-            task_text = self.current_task_title
-            self.apply_risk_settings(self.extract_current_task_risk())
-            self.current_task_group = self.extract_current_task_group()
+            # When the tasklist is MCP-backed, derive task title and ID from the
+            # remote provider's cached task list instead of reading a local file
+            # that may not exist or may be stale.
+            from millstone.artifact_providers.mcp import MCPTasklistProvider
+            from millstone.artifacts.models import TaskStatus
+
+            provider = self._outer_loop_manager.tasklist_provider
+            if isinstance(provider, MCPTasklistProvider):
+                cached = provider.list_tasks()
+                pending = [
+                    t for t in cached if t.status in (TaskStatus.todo, TaskStatus.in_progress)
+                ]
+                if pending:
+                    _mcp_task_item = pending[0]
+                    self.current_task_title = _mcp_task_item.title or "task"
+                    task_text = self.current_task_title
+                    self.apply_risk_settings(_mcp_task_item.risk)
+                    self.current_task_group = None
+                else:
+                    self.current_task_title = "task"
+                    task_text = self.current_task_title
+                    self.apply_risk_settings(None)
+                    self.current_task_group = None
+            else:
+                self.current_task_title = self.extract_current_task_title() or "task"
+                task_text = self.current_task_title
+                self.apply_risk_settings(self.extract_current_task_risk())
+                self.current_task_group = self.extract_current_task_group()
 
         self._current_task_text = task_text
         # Prefer canonical task_id from metadata (stable, ontology-compliant identity).
-        # In tasklist mode, use the raw block (title + indented metadata) so that
-        # "- ID: my-task" lines are visible to the parser.
-        # In --task mode, task_text already contains the full metadata block.
-        if self.task:
+        # When an MCP provider supplied the task, use its remote ID directly.
+        if _mcp_task_item is not None:
+            self._current_task_id = _mcp_task_item.task_id
+        elif self.task:
             _task_meta = self._tasklist_manager._parse_task_metadata(task_text)
+            self._current_task_id = _task_meta.get("task_id") or (
+                re.sub(r"[^a-z0-9]+", "-", task_text.lower()).strip("-")[:30] or None
+            )
         else:
+            # In tasklist mode, use the raw block (title + indented metadata) so that
+            # "- ID: my-task" lines are visible to the parser.
             _task_meta = self._tasklist_manager.extract_current_task_metadata()
-        self._current_task_id = _task_meta.get("task_id") or (
-            re.sub(r"[^a-z0-9]+", "-", task_text.lower()).strip("-")[:30] or None
-        )
+            self._current_task_id = _task_meta.get("task_id") or (
+                re.sub(r"[^a-z0-9]+", "-", task_text.lower()).strip("-")[:30] or None
+            )
 
         # Worker mode: when --shared-state-dir is set, emit result.json + heartbeats
         # for the worktree control plane to consume.
@@ -3001,6 +3128,15 @@ def _worker_finish(
             self.log("research_completed", task=task_text[:200], output_file=str(output_file))
             if not self.task:
                 self.mark_task_complete()
+                # Close remote task for MCP providers — mark_task_complete()
+                # only updates the local tasklist file, which is a no-op when
+                # the task lives on a remote backend (GitHub Issues, Linear, …).
+                from millstone.artifact_providers.mcp import MCPTasklistProvider
+                from millstone.artifacts.models import TaskStatus
+
+                provider = self._outer_loop_manager.tasklist_provider
+                if isinstance(provider, MCPTasklistProvider) and self._current_task_id:
+                    provider.update_task_status(self._current_task_id, TaskStatus.done)
             progress(f"{self._task_prefix()} Research completed -> {output_file}")
             _worker_finish("success", review_summary=builder_output[:4000])
             return True
@@ -3012,6 +3148,9 @@ def _worker_finish(
         # Track empty-diff retry state
         empty_diff_retried = False
 
+        # Record HEAD before builder runs so we can detect early commits
+        pre_build_head = self.git("rev-parse", "HEAD").strip()
+
         # Define Loop components
         def builder_producer(feedback: str | None = None) -> BuilderArtifact:
             if feedback:
@@ -3034,7 +3173,38 @@ def builder_producer(feedback: str | None = None) -> BuilderArtifact:
             # Ensure untracked files are included in diff (for reviewer visibility)
             self._stage_untracked_files()
 
-            return BuilderArtifact(output, self.git("status", "--short"), self.git("diff", "HEAD"))
+            # Detect if builder committed its own changes (HEAD advanced)
+            nonlocal pre_build_head
+            current_head = self.git("rev-parse", "HEAD").strip()
+            builder_committed = current_head != pre_build_head
+
+            if builder_committed:
+                progress(
+                    f"{self._task_prefix()} Builder committed changes directly "
+                    f"(HEAD advanced from {pre_build_head[:8]} to {current_head[:8]}). "
+                    "Using committed diff for review."
+                )
+                self.log(
+                    "builder_early_commit",
+                    old_head=pre_build_head,
+                    new_head=current_head,
+                )
+                # Use the diff from the builder's commit(s) for review.
+                # Also include any uncommitted changes on top of the builder's commits.
+                git_diff = self.git("diff", pre_build_head)
+                git_status = self.git("diff", "--stat", pre_build_head)
+            else:
+                git_diff = self.git("diff", "HEAD")
+                git_status = self.git("status", "--short")
+
+            # Update pre_build_head so the next cycle (if reviewer rejects)
+            # correctly detects whether the builder commits again.
+            if builder_committed:
+                pre_build_head = current_head
+
+            return BuilderArtifact(
+                output, git_status, git_diff, builder_committed=builder_committed
+            )
 
         def builder_validator(artifact: BuilderArtifact) -> tuple[bool, str | None]:
             nonlocal empty_diff_retried
@@ -3051,7 +3221,12 @@ def builder_validator(artifact: BuilderArtifact) -> tuple[bool, str | None]:
 
             # Retry once on empty diff before sanity check fails
             # This catches cases where the builder didn't make changes but should have
-            if not artifact.git_status.strip() and not empty_diff_retried:
+            # Skip nudge if builder already committed (diff extracted from commits)
+            if (
+                not artifact.git_status.strip()
+                and not empty_diff_retried
+                and not artifact.builder_committed
+            ):
                 empty_diff_retried = True
                 progress(f"{self._task_prefix()} No changes detected, nudging builder to retry...")
                 nudge_msg = (
@@ -3156,6 +3331,48 @@ def builder_on_success(artifact: BuilderArtifact, verdict: BuilderVerdict) -> bo
                     loop_state["failure_reason"] = "eval_gate_failed"
                     return False
 
+            if artifact.builder_committed:
+                # Builder committed during this cycle. Check if there are
+                # remaining uncommitted changes (e.g. from a review-fix cycle
+                # where the builder edited files without committing again).
+                worktree_status = self.git("status", "--porcelain").strip()
+                tasklist_path = str(self.tasklist)
+                non_tasklist_dirty = any(
+                    line.split()[-1] != tasklist_path
+                    for line in worktree_status.split("\n")
+                    if line.strip()
+                )
+
+                if non_tasklist_dirty:
+                    # There are uncommitted changes beyond the tasklist —
+                    # delegate a commit for the remaining edits.
+                    progress(
+                        f"{self._task_prefix()} Builder committed earlier but "
+                        "uncommitted changes remain — delegating commit."
+                    )
+                    if not self.delegate_commit():
+                        loop_state["failure_reason"] = "commit_failed"
+                        return False
+                else:
+                    progress(
+                        f"{self._task_prefix()} Skipping commit delegation (builder already committed)."
+                    )
+                    # Auto-commit the tasklist checkbox if it's the only
+                    # remaining dirty file (file-backed tasklist only).
+                    if not self._auto_commit_tasklist_if_needed():
+                        loop_state["failure_reason"] = "tasklist_commit_failed"
+                        return False
+
+                # Update baseline so next task measures LoC from this commit
+                self._update_loc_baseline()
+
+                if self.eval_on_commit and not self._run_eval_on_commit(task_text=task_text):
+                    loop_state["failure_reason"] = "eval_regression"
+                    return False
+
+                self.accumulate_group_context(task_text, git_diff=artifact.git_diff)
+                return True
+
             if self.delegate_commit():
                 if self.eval_on_commit and not self._run_eval_on_commit(task_text=task_text):
                     loop_state["failure_reason"] = "eval_regression"
diff --git a/tests/test_integration.py b/tests/test_integration.py
index dfef638..644dbf3 100644
--- a/tests/test_integration.py
+++ b/tests/test_integration.py
@@ -426,6 +426,295 @@ def test_no_changes_detected(self, temp_repo, capsys):
         finally:
             orch.cleanup()
 
+    def test_builder_early_commit_detected(self, temp_repo, capsys):
+        """
+        Scenario: Builder commits its own changes (HEAD advances).
+        Expected: Orchestrator detects the early commit, uses the committed
+        diff for review, skips delegate_commit, and succeeds.
+        """
+
+        def make_change_and_commit(repo):
+            """Simulate builder creating a file AND committing it."""
+            (repo / "feature.py").write_text("def new_feature():\n    pass\n")
+            _original_subprocess_run(["git", "add", "."], cwd=repo, capture_output=True)
+            _original_subprocess_run(
+                ["git", "commit", "-m", "feat: builder committed early"],
+                cwd=repo,
+                capture_output=True,
+            )
+
+        # Builder commits directly; no commit delegation needed
+        responses = (
+            ResponseBuilder()
+            .on_builder(change_fn=make_change_and_commit, output="Implemented feature.")
+            .on_reviewer(approve=True)
+            .build()
+        )
+
+        orch = Orchestrator(max_tasks=1)
+        try:
+            with patch("subprocess.run", side_effect=make_mock_runner(temp_repo, responses)):
+                exit_code = orch.run()
+
+            assert exit_code == 0
+            captured = capsys.readouterr()
+            # Should detect the early commit
+            assert "Builder committed changes directly" in captured.out
+            # Should skip commit delegation
+            assert "Skipping commit delegation" in captured.out
+            assert orch.cycle == 1
+        finally:
+            orch.cleanup()
+
+    def test_builder_early_commit_auto_commits_tasklist(self, temp_repo, capsys):
+        """
+        Scenario: Builder commits code but leaves the tasklist checkbox unstaged.
+        Expected: Orchestrator detects the early commit, auto-commits the
+        tasklist tick so the task won't be re-selected on the next run.
+
+        Uses docs/tasklist.md (git-tracked) to exercise the fallback path.
+        """
+
+        def commit_code_but_leave_tasklist(repo):
+            """Builder commits code but modifies tasklist without staging it."""
+            (repo / "feature.py").write_text("def new_feature():\n    pass\n")
+            _original_subprocess_run(["git", "add", "feature.py"], cwd=repo, capture_output=True)
+            _original_subprocess_run(
+                ["git", "commit", "-m", "feat: builder committed early"],
+                cwd=repo,
+                capture_output=True,
+            )
+            # Modify tasklist (check off the task) but don't stage/commit it
+            tasklist = repo / "docs" / "tasklist.md"
+            tasklist.write_text(
+                "# Tasklist\n\n- [x] Task 1: Do something\n- [ ] Task 2: Do another thing\n"
+            )
+
+        responses = (
+            ResponseBuilder()
+            .on_builder(change_fn=commit_code_but_leave_tasklist, output="Implemented feature.")
+            .on_reviewer(approve=True)
+            .build()
+        )
+
+        orch = Orchestrator(max_tasks=1, tasklist="docs/tasklist.md")
+        try:
+            with patch("subprocess.run", side_effect=make_mock_runner(temp_repo, responses)):
+                exit_code = orch.run()
+
+            assert exit_code == 0
+            captured = capsys.readouterr()
+            assert "Builder committed changes directly" in captured.out
+            assert "Skipping commit delegation" in captured.out
+            # Should auto-commit the tasklist tick
+            assert "Auto-committed tasklist tick" in captured.out
+
+            # Verify no uncommitted changes remain
+            status = _original_subprocess_run(
+                ["git", "status", "--porcelain"],
+                cwd=temp_repo,
+                capture_output=True,
+                text=True,
+            )
+            assert status.stdout.strip() == ""
+
+            # Verify the tasklist was actually committed
+            log = _original_subprocess_run(
+                ["git", "log", "--oneline", "-3"],
+                cwd=temp_repo,
+                capture_output=True,
+                text=True,
+            )
+            assert "Mark task complete" in log.stdout
+        finally:
+            orch.cleanup()
+
+    def test_builder_early_commit_then_review_fix_uncommitted(self, temp_repo, capsys):
+        """
+        Scenario: Builder commits in cycle 1, reviewer rejects, builder fixes
+        in cycle 2 WITHOUT committing. The orchestrator must detect the dirty
+        worktree and delegate a commit for the remaining edits.
+
+        Regression test for: builder_on_success skipping delegate_commit when
+        builder_committed was True, leaving uncommitted changes.
+        """
+        cycle_count = [0]
+
+        def make_change_and_commit(repo):
+            """Cycle 1: builder creates file AND commits it."""
+            (repo / "feature.py").write_text("def new_feature():\n    return 1\n")
+            _original_subprocess_run(["git", "add", "."], cwd=repo, capture_output=True)
+            _original_subprocess_run(
+                ["git", "commit", "-m", "feat: builder committed early"],
+                cwd=repo,
+                capture_output=True,
+            )
+            cycle_count[0] += 1
+
+        def make_fix_without_commit(repo):
+            """Cycle 2: builder edits file but does NOT commit."""
+            (repo / "feature.py").write_text("def new_feature():\n    return 2  # fixed\n")
+            _original_subprocess_run(["git", "add", "."], cwd=repo, capture_output=True)
+            cycle_count[0] += 1
+
+        build_count = [0]
+
+        def responses(prompt, counts):
+            if is_builder_prompt(prompt) or "address this review feedback" in prompt.lower():
+                build_count[0] += 1
+                if build_count[0] == 1:
+                    return ("Implemented feature.", make_change_and_commit)
+                else:
+                    return ("Fixed review feedback.", make_fix_without_commit)
+            elif is_reviewer_prompt(prompt):
+                if build_count[0] <= 1:
+                    return (
+                        '{"status": "REQUEST_CHANGES", "review": "Return value should be 2", '
+                        '"summary": "Needs fix", "findings": ["Wrong return value"], '
+                        '"findings_by_severity": {"critical": [], "high": ["Wrong return value"], '
+                        '"medium": [], "low": [], "nit": []}}',
+                        None,
+                    )
+                return (
+                    '{"status": "APPROVED", "review": "Looks good", "summary": "LGTM", '
+                    '"findings": [], "findings_by_severity": {"critical": [], "high": [], '
+                    '"medium": [], "low": [], "nit": []}}',
+                    None,
+                )
+            elif is_commit_prompt(prompt):
+                return ("Committed changes.", lambda repo: do_commit(repo))
+            elif is_sanity_check(prompt):
+                return ('{"status": "OK"}', None)
+            return ('{"status": "OK"}', None)
+
+        orch = Orchestrator(max_tasks=1, max_cycles=3)
+        try:
+            with patch("subprocess.run", side_effect=make_mock_runner(temp_repo, responses)):
+                exit_code = orch.run()
+
+            assert exit_code == 0
+            captured = capsys.readouterr()
+            # Cycle 1 should detect early commit
+            assert "Builder committed changes directly" in captured.out
+            # Cycle 2: builder didn't commit, so normal delegate_commit runs
+            assert "Delegating commit to builder" in captured.out
+
+            # Verify no uncommitted changes remain
+            status = _original_subprocess_run(
+                ["git", "status", "--porcelain"],
+                cwd=temp_repo,
+                capture_output=True,
+                text=True,
+            )
+            assert status.stdout.strip() == "", (
+                "Working directory should be clean — all changes committed"
+            )
+        finally:
+            orch.cleanup()
+
+    def test_builder_early_commit_updates_loc_baseline(self, temp_repo, capsys):
+        """
+        Scenario: Builder commits directly (early commit) in a multi-task run.
+        Expected: loc_baseline_ref is updated after the early-commit path so the
+        next task's LoC measurement starts from the new HEAD, not the old baseline.
+
+        Regression test for: builder_on_success skipping _update_loc_baseline()
+        when builder_committed was True, causing stale baselines in multi-task runs.
+        """
+
+        def make_change_and_commit(repo):
+            (repo / "feature.py").write_text("def new_feature():\n    pass\n")
+            _original_subprocess_run(["git", "add", "."], cwd=repo, capture_output=True)
+            _original_subprocess_run(
+                ["git", "commit", "-m", "feat: builder committed early"],
+                cwd=repo,
+                capture_output=True,
+            )
+
+        responses = (
+            ResponseBuilder()
+            .on_builder(change_fn=make_change_and_commit, output="Implemented feature.")
+            .on_reviewer(approve=True)
+            .build()
+        )
+
+        orch = Orchestrator(max_tasks=1)
+        try:
+            with patch("subprocess.run", side_effect=make_mock_runner(temp_repo, responses)):
+                exit_code = orch.run()
+
+            assert exit_code == 0
+            # After the early-commit path, loc_baseline_ref should point to HEAD
+            current_head = _original_subprocess_run(
+                ["git", "rev-parse", "HEAD"],
+                cwd=temp_repo,
+                capture_output=True,
+                text=True,
+            ).stdout.strip()
+            assert orch.loc_baseline_ref == current_head, (
+                f"loc_baseline_ref should be updated to HEAD ({current_head}) "
+                f"but was {orch.loc_baseline_ref}"
+            )
+        finally:
+            orch.cleanup()
+
+    def test_builder_early_commit_tasklist_autocommit_git_failure(self, temp_repo, capsys):
+        """
+        Scenario: Builder commits code early but the tasklist auto-commit fails.
+        Expected: A warning is printed and the failure is not silently swallowed.
+
+        Regression test for: _auto_commit_tasklist_if_needed ignoring git failures.
+        """
+
+        def commit_code_but_leave_tasklist(repo):
+            (repo / "feature.py").write_text("def new_feature():\n    pass\n")
+            _original_subprocess_run(["git", "add", "feature.py"], cwd=repo, capture_output=True)
+            _original_subprocess_run(
+                ["git", "commit", "-m", "feat: builder committed early"],
+                cwd=repo,
+                capture_output=True,
+            )
+            # Modify tasklist but don't stage it
+            tasklist = repo / "docs" / "tasklist.md"
+            tasklist.write_text(
+                "# Tasklist\n\n- [x] Task 1: Do something\n- [ ] Task 2: Do another thing\n"
+            )
+
+        responses = (
+            ResponseBuilder()
+            .on_builder(change_fn=commit_code_but_leave_tasklist, output="Implemented feature.")
+            .on_reviewer(approve=True)
+            .build()
+        )
+
+        orch = Orchestrator(max_tasks=1, tasklist="docs/tasklist.md")
+        try:
+            # Patch subprocess.run to fail on git commit for tasklist
+            real_mock = make_mock_runner(temp_repo, responses)
+
+            def mock_with_commit_failure(cmd, **kwargs):
+                # Intercept the tasklist auto-commit
+                if (
+                    cmd[0] == "git"
+                    and cmd[1] == "commit"
+                    and len(cmd) > 3
+                    and "Mark task complete" in cmd[3]
+                ):
+                    return MagicMock(returncode=1, stdout=b"", stderr=b"commit failed")
+                return real_mock(cmd, **kwargs)
+
+            with patch("subprocess.run", side_effect=mock_with_commit_failure):
+                exit_code = orch.run()
+
+            # Task must fail — tasklist not marked complete means task can be
+            # re-selected on the next run, breaking completion semantics.
+            assert exit_code == 1
+            captured = capsys.readouterr()
+            # Should report the failed tasklist commit
+            assert "ERROR: git commit for tasklist failed" in captured.out
+        finally:
+            orch.cleanup()
+
     def test_sensitive_file_no_longer_halts_flow(self, temp_repo, capsys):
         """
         Scenario: Builder modifies a sensitive file.
diff --git a/tests/test_orchestrator.py b/tests/test_orchestrator.py
index 314a466..4e26724 100644
--- a/tests/test_orchestrator.py
+++ b/tests/test_orchestrator.py
@@ -1869,6 +1869,72 @@ def test_run_single_task_research_mode_with_direct_task(self, temp_repo):
             finally:
                 orch.cleanup()
 
+    def test_run_single_task_research_mode_closes_mcp_task(self, temp_repo):
+        """Research mode closes remote task via update_task_status for MCP providers.
+
+        Verifies that the real remote task ID (not a local-file-derived fallback)
+        is passed to update_task_status.
+        """
+        from unittest.mock import MagicMock, patch
+
+        from millstone.artifact_providers.mcp import MCPTasklistProvider
+        from millstone.artifacts.models import TasklistItem, TaskStatus
+
+        tasklist = temp_repo / "docs" / "tasklist.md"
+        tasklist.write_text("# Tasks\n\n- [ ] Investigate auth flow\n")
+
+        with patch("subprocess.run") as mock_run:
+            mock_run.return_value = MagicMock(
+                returncode=0,
+                stdout="## FINDINGS\n\nAuth uses OAuth2\n",
+                stderr="",
+            )
+
+            orch = Orchestrator(tasklist="docs/tasklist.md", research=True, cli="claude")
+            try:
+                # Inject a mock MCP tasklist provider with a known remote task ID
+                mock_mcp = MagicMock(spec=MCPTasklistProvider)
+                mock_mcp.get_prompt_placeholders.return_value = {}
+                mock_mcp.list_tasks.return_value = [
+                    TasklistItem(
+                        task_id="GH-42",
+                        title="Investigate auth flow",
+                        status=TaskStatus.todo,
+                    ),
+                ]
+                orch._outer_loop_manager.tasklist_provider = mock_mcp
+
+                result = orch.run_single_task()
+                assert result is True
+
+                # MCP provider should have been asked to close the exact remote task
+                mock_mcp.update_task_status.assert_called_once_with("GH-42", TaskStatus.done)
+            finally:
+                orch.cleanup()
+
+    def test_run_single_task_research_mode_direct_task_skips_mcp_close(self, temp_repo):
+        """Research mode with --task (direct task) does NOT attempt MCP close."""
+        from unittest.mock import MagicMock, patch
+
+        from millstone.artifact_providers.mcp import MCPTasklistProvider
+
+        with patch("subprocess.run") as mock_run:
+            mock_run.return_value = MagicMock(returncode=0, stdout="Research output", stderr="")
+
+            orch = Orchestrator(task="Check perf", research=True, cli="claude")
+            try:
+                mock_mcp = MagicMock(spec=MCPTasklistProvider)
+                mock_mcp.get_prompt_placeholders.return_value = {}
+                orch._outer_loop_manager.tasklist_provider = mock_mcp
+
+                result = orch.run_single_task()
+                assert result is True
+
+                # With --task, mark_task_complete + MCP close should be skipped
+                mock_mcp.update_task_status.assert_not_called()
+            finally:
+                orch.cleanup()
+
 
 class TestIsApproved:
     """Tests for approval detection."""
@@ -15631,18 +15697,77 @@ def test_analyze_tasklist_shows_no_file_message(self, temp_repo, capsys):
 
     def test_analyze_tasklist_with_mcp_provider(self, temp_repo, capsys):
         """--analyze-tasklist with MCP provider shows remote info, not file error."""
+        from unittest.mock import patch
+
         from millstone.artifact_providers.mcp import MCPTasklistProvider
 
         orch = Orchestrator(dry_run=False, quiet=True)
         try:
             provider = MCPTasklistProvider("github", labels=["millstone"])
             orch._outer_loop_manager.tasklist_provider = provider
-            result = orch.analyze_tasklist()
+            with patch.object(provider, "list_tasks", return_value=[]):
+                with patch.object(provider, "invalidate_cache"):
+                    result = orch.analyze_tasklist()
 
             captured = capsys.readouterr()
             assert "Remote tasklist provider: github" in captured.out
             assert "Labels: millstone" in captured.out
             assert "Tasklist not found" not in captured.out
+            assert "Open tasks: 0" in captured.out
+            assert result["pending_count"] == 0
+            assert result["total_count"] == 0
+        finally:
+            orch.cleanup()
+
+    def test_analyze_tasklist_with_mcp_provider_shows_open_count(self, temp_repo, capsys):
+        """--analyze-tasklist with MCP provider fetches and displays live task counts."""
+        from unittest.mock import patch
+
+        from millstone.artifact_providers.mcp import MCPTasklistProvider
+        from millstone.artifacts.models import TasklistItem, TaskStatus
+
+        orch = Orchestrator(dry_run=False, quiet=True)
+        try:
+            provider = MCPTasklistProvider("github", labels=["millstone"])
+            mock_tasks = [
+                TasklistItem(task_id="1", title="Open task A", status=TaskStatus.todo),
+                TasklistItem(task_id="2", title="Open task B", status=TaskStatus.in_progress),
+                TasklistItem(task_id="3", title="Done task", status=TaskStatus.done),
+                TasklistItem(task_id="4", title="Blocked task", status=TaskStatus.blocked),
+            ]
+            orch._outer_loop_manager.tasklist_provider = provider
+            with patch.object(provider, "list_tasks", return_value=mock_tasks):
+                with patch.object(provider, "invalidate_cache"):
+                    result = orch.analyze_tasklist()
+
+            captured = capsys.readouterr()
+            assert "Remote tasklist provider: github" in captured.out
+            assert "Open tasks: 3" in captured.out
+            assert result["pending_count"] == 3
+            assert result["completed_count"] == 1
+            assert result["total_count"] == 4
+        finally:
+            orch.cleanup()
+
+    def test_analyze_tasklist_with_mcp_provider_fetch_error(self, temp_repo, capsys):
+        """--analyze-tasklist degrades gracefully when MCP fetch fails."""
+        from unittest.mock import patch
+
+        from millstone.artifact_providers.mcp import MCPTasklistProvider
+
+        orch = Orchestrator(dry_run=False, quiet=True)
+        try:
+            provider = MCPTasklistProvider("github", labels=["millstone"])
+            orch._outer_loop_manager.tasklist_provider = provider
+            with patch.object(
+                provider, "list_tasks", side_effect=RuntimeError("connection failed")
+            ):
+                with patch.object(provider, "invalidate_cache"):
+                    result = orch.analyze_tasklist()
+
+            captured = capsys.readouterr()
+            assert "Remote tasklist provider: github" in captured.out
+            assert "unable to fetch task count" in captured.out
             assert result["pending_count"] == 0
             assert result["total_count"] == 0
         finally: