From 3c62cada7884365717102172f28133e41e609dc7 Mon Sep 17 00:00:00 2001
From: Serhan <serhanasad2013@live.com>
Date: Wed, 28 Jan 2026 11:58:26 -0500
Subject: [PATCH 1/5] Add failing tests for issue #418: temp directory leak

This commit adds comprehensive unit and E2E tests that detect the
resource leak bug where temp directories are not cleaned up when
git clone fails in _setup_repository().

Unit tests (tests/test_agentic_change_issue_418.py):
- Test cleanup on clone failure (non-zero exit code)
- Test cleanup on subprocess exception
- Regression test for successful clones
- Test accumulation of leaked directories

E2E tests (tests/test_e2e_issue_418.py):
- Test real clone failures with non-existent repos
- Test subprocess exceptions
- Regression test for successful operations
- Integration test for cumulative leak impact

Tests currently fail, demonstrating the bug. They will pass once
the fix is implemented.

Related to #418

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 tests/test_agentic_change_issue_418.py | 468 ++++++++++++++++++++++
 tests/test_e2e_issue_418.py            | 515 +++++++++++++++++++++++++
 2 files changed, 983 insertions(+)
 create mode 100644 tests/test_agentic_change_issue_418.py
 create mode 100644 tests/test_e2e_issue_418.py

diff --git a/tests/test_agentic_change_issue_418.py b/tests/test_agentic_change_issue_418.py
new file mode 100644
index 00000000..69c43d24
--- /dev/null
+++ b/tests/test_agentic_change_issue_418.py
@@ -0,0 +1,468 @@
+"""
+Unit Tests for Issue #418: Resource Leak - Temp Directories Not Cleaned Up When Git Clone Fails
+
+This test verifies that temporary directories created during repository setup are
+properly cleaned up when git clone operations fail.
+
+The Bug:
+- _setup_repository() creates a temp directory with tempfile.mkdtemp() at line 112
+- When git clone fails, RuntimeError is raised at lines 129 or 131
+- The temp directory is never cleaned up, causing disk space leaks
+- Each failed operation leaves an orphaned directory on disk
+
+Test Strategy:
+- Use mocking to simulate clone failures
+- Track temp directory creation and verify cleanup
+- Cover both failure modes: non-zero exit code and subprocess exceptions
+- Include regression test to ensure successful clones still work
+
+User-Facing Impact:
+- Network interruptions during clone
+- Git authentication failures
+- Rate limiting or private repository access errors
+- Non-existent repositories
+- Each failure leaks disk space permanently
+"""
+
+import json
+import tempfile
+from pathlib import Path
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from pdd.agentic_change import run_agentic_change
+
+
+class TestIssue418TempDirectoryLeak:
+    """
+    Unit tests for Issue #418: Temp directory resource leak when git clone fails.
+    """
+
+    def test_temp_dir_cleaned_up_on_clone_failure_nonzero_exit(self, tmp_path: Path):
+        """
+        Test that temp directory is cleaned up when git clone returns non-zero exit code.
+
+        Scenario:
+        1. run_agentic_change() is called with a valid issue URL
+        2. _setup_repository() creates a temp directory (line 112)
+        3. gh repo clone command returns non-zero exit code (network error, auth failure, etc.)
+        4. RuntimeError is raised at line 129
+
+        Expected: Temp directory should be cleaned up before raising the error
+        Bug behavior: Temp directory remains on disk (leaked)
+
+        This covers the most common failure scenarios:
+        - Network interruptions
+        - Authentication failures
+        - Non-existent repositories
+        - Rate limiting
+        """
+        # Create a real temp directory that we can verify exists/doesn't exist
+        real_temp_dir = tmp_path / "pdd_test_repo_clone_fail"
+        real_temp_dir.mkdir()
+
+        with patch("pdd.agentic_change.shutil.which") as mock_which, \
+             patch("pdd.agentic_change.subprocess.run") as mock_subprocess, \
+             patch("pdd.agentic_change.tempfile.mkdtemp") as mock_mkdtemp, \
+             patch("pdd.agentic_change.console"), \
+             patch("pdd.agentic_change.Path.cwd") as mock_cwd:
+
+            # Setup: gh is installed
+            mock_which.return_value = "/usr/bin/gh"
+
+            # Setup: temp directory creation returns our test path
+            mock_mkdtemp.return_value = str(real_temp_dir)
+
+            # Setup: Not in the target repository (force clone path)
+            mock_cwd.return_value.__truediv__.return_value.exists.return_value = False
+
+            # Setup: Issue API call succeeds
+            issue_data = {
+                "title": "Test Issue",
+                "body": "Test body",
+                "user": {"login": "testuser"},
+                "comments_url": "https://api.github.com/repos/owner/repo/issues/1/comments"
+            }
+
+            def subprocess_side_effect(args, **kwargs):
+                cmd = args if isinstance(args, list) else []
+
+                # Issue API call succeeds (check the full path)
+                if "api" in cmd and len(cmd) > 2 and "repos/owner/repo/issues/1" in cmd[2]:
+                    m = MagicMock()
+                    m.returncode = 0
+                    m.stdout = json.dumps(issue_data)
+                    return m
+
+                # Comments API call succeeds
+                if "api" in cmd and len(cmd) > 2 and "comments" in cmd[2]:
+                    m = MagicMock()
+                    m.returncode = 0
+                    m.stdout = "[]"
+                    return m
+
+                # Git remote check fails (not in repo)
+                if "git" in cmd[0] and "remote" in cmd:
+                    m = MagicMock()
+                    m.returncode = 1
+                    return m
+
+                # Clone fails with non-zero exit code
+                if "repo" in cmd and "clone" in cmd:
+                    m = MagicMock()
+                    m.returncode = 1
+                    m.stderr = "fatal: repository not found"
+                    return m
+
+                # Default
+                m = MagicMock()
+                m.returncode = 0
+                m.stdout = ""
+                return m
+
+            mock_subprocess.side_effect = subprocess_side_effect
+
+            # Verify temp dir exists before the call
+            assert real_temp_dir.exists(), "Test setup failed: temp_dir should exist"
+
+            # Execute: Call run_agentic_change which should fail and clean up
+            success, msg, _, _, _ = run_agentic_change("https://github.com/owner/repo/issues/1")
+
+            # Verify the function failed as expected
+            assert success is False
+            assert "Failed to clone repository" in msg
+
+            # THE BUG CHECK: Temp directory should be cleaned up
+            if real_temp_dir.exists():
+                pytest.fail(
+                    f"BUG DETECTED (Issue #418): Temp directory was not cleaned up!\n\n"
+                    f"Scenario:\n"
+                    f"  1. _setup_repository() creates temp_dir at line 112: tempfile.mkdtemp()\n"
+                    f"  2. gh repo clone returns non-zero exit code (line 128)\n"
+                    f"  3. RuntimeError raised at line 129: 'Failed to clone repository'\n"
+                    f"  4. Temp directory never cleaned up - LEAKED!\n\n"
+                    f"Results:\n"
+                    f"  - Temp directory path: {real_temp_dir}\n"
+                    f"  - Directory exists after error: {real_temp_dir.exists()} ⚠️\n"
+                    f"  - Expected: Directory deleted before raising error\n\n"
+                    f"Impact:\n"
+                    f"  - Each failed clone leaks a temp directory\n"
+                    f"  - Common triggers: network errors, auth failures, rate limiting\n"
+                    f"  - Permanent disk space leak until manual cleanup\n\n"
+                    f"Root Cause:\n"
+                    f"  - Line 112 creates temp directory with mkdtemp()\n"
+                    f"  - Line 129 raises RuntimeError without cleanup\n"
+                    f"  - No shutil.rmtree() call before exception\n\n"
+                    f"Fix:\n"
+                    f"  Add cleanup before raising at line 129:\n"
+                    f"    if result.returncode != 0:\n"
+                    f"        shutil.rmtree(temp_dir, ignore_errors=True)\n"
+                    f"        raise RuntimeError(...)\n"
+                )
+
+    def test_temp_dir_cleaned_up_on_subprocess_exception(self, tmp_path: Path):
+        """
+        Test that temp directory is cleaned up when subprocess.run raises an exception.
+
+        Scenario:
+        1. run_agentic_change() is called with a valid issue URL
+        2. _setup_repository() creates a temp directory (line 112)
+        3. subprocess.run() raises an exception (OSError, command not found, etc.)
+        4. Exception is caught at line 130 and RuntimeError is raised at line 131
+
+        Expected: Temp directory should be cleaned up before re-raising the error
+        Bug behavior: Temp directory remains on disk (leaked)
+
+        This covers less common but still real scenarios:
+        - gh command not found or corrupted
+        - OS-level errors (disk full, permission denied)
+        - System resource exhaustion
+        """
+        real_temp_dir = tmp_path / "pdd_test_repo_exception"
+        real_temp_dir.mkdir()
+
+        with patch("pdd.agentic_change.shutil.which") as mock_which, \
+             patch("pdd.agentic_change.subprocess.run") as mock_subprocess, \
+             patch("pdd.agentic_change.tempfile.mkdtemp") as mock_mkdtemp, \
+             patch("pdd.agentic_change.console"), \
+             patch("pdd.agentic_change.Path.cwd") as mock_cwd:
+
+            mock_which.return_value = "/usr/bin/gh"
+            mock_mkdtemp.return_value = str(real_temp_dir)
+            mock_cwd.return_value.__truediv__.return_value.exists.return_value = False
+
+            issue_data = {
+                "title": "Test Issue",
+                "body": "Test body",
+                "user": {"login": "testuser"},
+                "comments_url": "https://api.github.com/repos/owner/repo/issues/1/comments"
+            }
+
+            call_count = [0]
+
+            def subprocess_side_effect(args, **kwargs):
+                cmd = args if isinstance(args, list) else []
+
+                # Issue API call succeeds
+                if "api" in cmd and len(cmd) > 2 and "repos/owner/repo/issues/1" in cmd[2]:
+                    m = MagicMock()
+                    m.returncode = 0
+                    m.stdout = json.dumps(issue_data)
+                    return m
+
+                # Comments API call succeeds
+                if "api" in cmd and len(cmd) > 2 and "comments" in cmd[2]:
+                    m = MagicMock()
+                    m.returncode = 0
+                    m.stdout = "[]"
+                    return m
+
+                # Git remote check fails
+                if "git" in cmd[0] and "remote" in cmd:
+                    m = MagicMock()
+                    m.returncode = 1
+                    return m
+
+                # Clone raises exception
+                if "repo" in cmd and "clone" in cmd:
+                    raise OSError("Command not found: gh")
+
+                m = MagicMock()
+                m.returncode = 0
+                return m
+
+            mock_subprocess.side_effect = subprocess_side_effect
+
+            # Verify temp dir exists before the call
+            assert real_temp_dir.exists(), "Test setup failed: temp_dir should exist"
+
+            # Execute: Call run_agentic_change which should fail and clean up
+            success, msg, _, _, _ = run_agentic_change("https://github.com/owner/repo/issues/1")
+
+            # Verify the function failed as expected
+            assert success is False
+            assert "Failed to execute clone command" in msg
+
+            # THE BUG CHECK: Temp directory should be cleaned up
+            if real_temp_dir.exists():
+                pytest.fail(
+                    f"BUG DETECTED (Issue #418): Temp directory leaked on subprocess exception!\n\n"
+                    f"Scenario:\n"
+                    f"  1. _setup_repository() creates temp_dir at line 112\n"
+                    f"  2. subprocess.run(['gh', 'repo', 'clone', ...]) raises exception (line 121)\n"
+                    f"  3. Exception caught at line 130: except Exception as e\n"
+                    f"  4. RuntimeError raised at line 131: 'Failed to execute clone command'\n"
+                    f"  5. Temp directory never cleaned up - LEAKED!\n\n"
+                    f"Results:\n"
+                    f"  - Temp directory path: {real_temp_dir}\n"
+                    f"  - Directory exists after exception: {real_temp_dir.exists()} ⚠️\n"
+                    f"  - Expected: Directory deleted in exception handler\n\n"
+                    f"Impact:\n"
+                    f"  - Any subprocess exception leaks temp directory\n"
+                    f"  - Triggers: missing gh command, OS errors, resource exhaustion\n"
+                    f"  - Permanent disk space leak\n\n"
+                    f"Fix:\n"
+                    f"  Add cleanup in exception handler at line 130:\n"
+                    f"    except Exception as e:\n"
+                    f"        shutil.rmtree(temp_dir, ignore_errors=True)\n"
+                    f"        raise RuntimeError(...)\n"
+                )
+
+    def test_temp_dir_not_cleaned_up_on_successful_clone(self, tmp_path: Path):
+        """
+        Regression test: Successful clone should NOT clean up the temp directory.
+
+        Scenario:
+        1. run_agentic_change() is called with a valid issue URL
+        2. _setup_repository() creates a temp directory
+        3. gh repo clone succeeds
+        4. Temp directory is returned to caller for use
+
+        Expected: Temp directory should exist and be returned
+        Purpose: Ensure the fix doesn't over-aggressively clean up on success
+
+        This is a regression test to ensure that our fix only cleans up on failure,
+        and doesn't break the normal successful flow.
+        """
+        real_temp_dir = tmp_path / "pdd_test_repo_success"
+        real_temp_dir.mkdir()
+
+        with patch("pdd.agentic_change.shutil.which") as mock_which, \
+             patch("pdd.agentic_change.subprocess.run") as mock_subprocess, \
+             patch("pdd.agentic_change.tempfile.mkdtemp") as mock_mkdtemp, \
+             patch("pdd.agentic_change.run_agentic_change_orchestrator") as mock_orch, \
+             patch("pdd.agentic_change.console"), \
+             patch("pdd.agentic_change.Path.cwd") as mock_cwd:
+
+            mock_which.return_value = "/usr/bin/gh"
+            mock_mkdtemp.return_value = str(real_temp_dir)
+            mock_cwd.return_value.__truediv__.return_value.exists.return_value = False
+
+            # Orchestrator returns success
+            mock_orch.return_value = (True, "Success", 1.0, "gpt-4", ["file.py"])
+
+            issue_data = {
+                "title": "Test Issue",
+                "body": "Test body",
+                "user": {"login": "testuser"},
+                "comments_url": "https://api.github.com/repos/owner/repo/issues/1/comments"
+            }
+
+            def subprocess_side_effect(args, **kwargs):
+                cmd = args if isinstance(args, list) else []
+
+                # Issue API call succeeds
+                if "api" in cmd and len(cmd) > 2 and "repos/owner/repo/issues/1" in cmd[2]:
+                    m = MagicMock()
+                    m.returncode = 0
+                    m.stdout = json.dumps(issue_data)
+                    return m
+
+                # Comments API call succeeds
+                if "api" in cmd and len(cmd) > 2 and "comments" in cmd[2]:
+                    m = MagicMock()
+                    m.returncode = 0
+                    m.stdout = "[]"
+                    return m
+
+                # Git remote check fails
+                if "git" in cmd[0] and "remote" in cmd:
+                    m = MagicMock()
+                    m.returncode = 1
+                    return m
+
+                # Clone succeeds
+                if "repo" in cmd and "clone" in cmd:
+                    m = MagicMock()
+                    m.returncode = 0
+                    m.stdout = "Cloning..."
+                    return m
+
+                m = MagicMock()
+                m.returncode = 0
+                return m
+
+            mock_subprocess.side_effect = subprocess_side_effect
+
+            # Execute: Call run_agentic_change which should succeed
+            success, msg, _, _, _ = run_agentic_change("https://github.com/owner/repo/issues/1")
+
+            # Verify the function succeeded
+            assert success is True
+
+            # Regression check: On success, temp directory should still exist
+            # (it's the caller's responsibility to use and clean it up)
+            assert real_temp_dir.exists(), (
+                f"Regression failure: Temp directory was deleted on successful clone!\n\n"
+                f"The fix should only clean up on FAILURE, not on success.\n"
+                f"Successful clones need the temp directory for subsequent operations.\n\n"
+                f"Temp directory path: {real_temp_dir}\n"
+                f"Expected: Directory exists after successful clone\n"
+                f"Actual: Directory was deleted (broke existing functionality)"
+            )
+
+    def test_multiple_failures_accumulate_leaked_directories(self, tmp_path: Path):
+        """
+        Integration test: Multiple sequential failures accumulate leaked directories.
+
+        Scenario:
+        1. Call run_agentic_change() multiple times with different failing scenarios
+        2. Each call creates a new temp directory
+        3. Each call fails and should clean up its temp directory
+
+        Expected: No temp directories leaked (each failure cleans up)
+        Bug behavior: Each failure leaks a directory, accumulating over time
+
+        This demonstrates the real-world impact where repeated operations (e.g., in CI/CD,
+        automated workflows, or retry loops) cause significant disk waste over time.
+        """
+        leaked_dirs = []
+
+        # Simulate 3 consecutive failures
+        for i in range(3):
+            real_temp_dir = tmp_path / f"pdd_test_repo_multi_{i}"
+            real_temp_dir.mkdir()
+            leaked_dirs.append(real_temp_dir)
+
+            with patch("pdd.agentic_change.shutil.which") as mock_which, \
+                 patch("pdd.agentic_change.subprocess.run") as mock_subprocess, \
+                 patch("pdd.agentic_change.tempfile.mkdtemp") as mock_mkdtemp, \
+                 patch("pdd.agentic_change.console"), \
+                 patch("pdd.agentic_change.Path.cwd") as mock_cwd:
+
+                mock_which.return_value = "/usr/bin/gh"
+                mock_mkdtemp.return_value = str(real_temp_dir)
+                mock_cwd.return_value.__truediv__.return_value.exists.return_value = False
+
+                issue_data = {
+                    "title": f"Test Issue {i}",
+                    "body": f"Test body {i}",
+                    "user": {"login": "testuser"},
+                    "comments_url": f"https://api.github.com/repos/owner/repo/issues/{i}/comments"
+                }
+
+                def subprocess_side_effect(args, **kwargs):
+                    cmd = args if isinstance(args, list) else []
+
+                    # Issue API call succeeds
+                    if "api" in cmd and len(cmd) > 2 and f"repos/owner/repo/issues/{i}" in cmd[2]:
+                        m = MagicMock()
+                        m.returncode = 0
+                        m.stdout = json.dumps(issue_data)
+                        return m
+
+                    # Comments API call succeeds
+                    if "api" in cmd and len(cmd) > 2 and "comments" in cmd[2]:
+                        m = MagicMock()
+                        m.returncode = 0
+                        m.stdout = "[]"
+                        return m
+
+                    # Git remote check fails
+                    if "git" in cmd[0] and "remote" in cmd:
+                        m = MagicMock()
+                        m.returncode = 1
+                        return m
+
+                    # Clone fails
+                    if "repo" in cmd and "clone" in cmd:
+                        m = MagicMock()
+                        m.returncode = 1
+                        m.stderr = f"Error {i}: Network timeout"
+                        return m
+
+                    m = MagicMock()
+                    m.returncode = 0
+                    return m
+
+                mock_subprocess.side_effect = subprocess_side_effect
+
+                # Execute: This should fail and clean up
+                success, _, _, _, _ = run_agentic_change(f"https://github.com/owner/repo/issues/{i}")
+                assert success is False
+
+        # THE BUG CHECK: Count how many directories were leaked
+        leaked_count = sum(1 for d in leaked_dirs if d.exists())
+
+        if leaked_count > 0:
+            leaked_paths = [str(d) for d in leaked_dirs if d.exists()]
+            pytest.fail(
+                f"BUG DETECTED (Issue #418): Multiple failures accumulated leaked directories!\n\n"
+                f"Scenario:\n"
+                f"  - Simulated 3 consecutive clone failures\n"
+                f"  - Each created a new temp directory\n"
+                f"  - Each should have cleaned up on failure\n\n"
+                f"Results:\n"
+                f"  - Total operations: 3\n"
+                f"  - Directories leaked: {leaked_count} ⚠️\n"
+                f"  - Leaked paths:\n    " + "\n    ".join(leaked_paths) + "\n\n"
+                f"Impact:\n"
+                f"  - In CI/CD: Repeated job failures fill up disk over time\n"
+                f"  - In retry loops: Each retry leaks more space\n"
+                f"  - In long-running services: Memory/disk exhaustion\n"
+                f"  - Real-world example: 100 failures = 100 leaked directories\n\n"
+                f"Fix Required:\n"
+                f"  Every failure path must clean up temp_dir with shutil.rmtree()\n"
+                f"  before raising exceptions."
+            )
diff --git a/tests/test_e2e_issue_418.py b/tests/test_e2e_issue_418.py
new file mode 100644
index 00000000..f35f83da
--- /dev/null
+++ b/tests/test_e2e_issue_418.py
@@ -0,0 +1,515 @@
+"""
+E2E Test for Issue #418: Temp Directory Resource Leak in _setup_repository()
+
+This test verifies the bug at a system level by simulating what happens when
+a user triggers a git clone failure through the agentic_change workflow.
+
+The Bug:
+- _setup_repository() creates a temp directory at line 112: tempfile.mkdtemp()
+- When git clone fails, RuntimeError is raised at lines 129 or 131
+- The temp directory is never cleaned up before the exception is raised
+- Over time, this accumulates orphaned temp directories and wastes disk space
+
+E2E Test Strategy:
+- Use subprocess to run a Python script that:
+  1. Calls _setup_repository() with a repository that will fail to clone
+  2. Captures the created temp directory path
+  3. Verifies the temp directory still exists after the exception
+- This simulates real-world user behavior when clones fail
+- The test should FAIL on buggy code (temp dir exists) and PASS once fixed
+
+User-Facing Impact:
+- Users running pdd commands on invalid/private repositories
+- Network interruptions during git clone
+- Authentication failures
+- Rate limiting issues
+- Disk space gradually filled with orphaned temp directories
+- In extreme cases: "No space left on device" errors
+"""
+
+import glob
+import json
+import os
+import subprocess
+import sys
+import tempfile
+from pathlib import Path
+
+import pytest
+
+
+def get_project_root() -> Path:
+    """Get the project root directory."""
+    current = Path(__file__).parent.parent
+    return current
+
+
+class TestTempDirLeakE2E:
+    """
+    E2E tests for Issue #418: Temp directory resource leak in _setup_repository()
+    when git clone fails.
+    """
+
+    def test_temp_dir_leaked_on_clone_failure_nonexistent_repo(self, tmp_path: Path):
+        """
+        E2E Test: _setup_repository() leaks temp directory when clone fails.
+
+        This test simulates the real-world scenario:
+        1. User runs a pdd command that calls _setup_repository()
+        2. Git clone fails (e.g., non-existent repo, auth failure, network issue)
+        3. Temp directory should be cleaned up, but due to the bug, it persists
+
+        Expected behavior (after fix):
+        - Temp directory is deleted before RuntimeError is raised
+        - No pdd_* directories remain in /tmp after the exception
+
+        Bug behavior (Issue #418):
+        - Temp directory remains in /tmp after RuntimeError
+        - Multiple failures accumulate leaked temp directories
+        - Disk space is gradually consumed
+        """
+        project_root = get_project_root()
+
+        # Create a test script that simulates the bug scenario
+        test_script = f'''
+import sys
+import json
+import os
+import glob
+import tempfile
+from pathlib import Path
+
+# Add project root to path
+sys.path.insert(0, "{project_root}")
+
+from pdd.agentic_change import _setup_repository
+
+def count_pdd_temp_dirs():
+    """Count temp directories matching pdd_* pattern."""
+    temp_root = tempfile.gettempdir()
+    pattern = os.path.join(temp_root, "pdd_*")
+    return len(glob.glob(pattern))
+
+if __name__ == "__main__":
+    # Count temp directories before
+    before_count = count_pdd_temp_dirs()
+
+    # Try to setup a repository that will fail to clone
+    # Using a non-existent repository to trigger clone failure
+    temp_dir = None
+    exception_raised = False
+
+    try:
+        # This should create a temp directory and then fail during clone
+        temp_dir = _setup_repository(
+            owner="nonexistent-user-12345",
+            repo="nonexistent-repo-67890",
+            quiet=True
+        )
+    except RuntimeError as e:
+        # Expected - clone should fail
+        exception_raised = True
+        error_message = str(e)
+
+    # Count temp directories after
+    after_count = count_pdd_temp_dirs()
+
+    # Find any leaked pdd_* directories
+    temp_root = tempfile.gettempdir()
+    leaked_dirs = glob.glob(os.path.join(temp_root, "pdd_nonexistent-repo-*"))
+
+    # Output results as JSON
+    result = {{
+        "before_count": before_count,
+        "after_count": after_count,
+        "exception_raised": exception_raised,
+        "leaked_dirs": leaked_dirs,
+        "temp_dirs_created": after_count - before_count,
+        "bug_detected": after_count > before_count
+    }}
+
+    print(json.dumps(result))
+'''
+
+        # Write the test script to a temp file
+        script_file = tmp_path / "test_clone_failure_leak.py"
+        script_file.write_text(test_script)
+
+        # Run the test script in a subprocess with proper environment
+        env = os.environ.copy()
+        env['PYTHONPATH'] = str(project_root)
+        env['PDD_FORCE_LOCAL'] = '1'  # Prevent any real API calls if possible
+
+        result = subprocess.run(
+            [sys.executable, str(script_file)],
+            capture_output=True,
+            text=True,
+            cwd=str(tmp_path),
+            env=env,
+            timeout=30
+        )
+
+        # Parse the output
+        if result.returncode != 0:
+            pytest.fail(
+                f"Test script failed to run:\n"
+                f"Return code: {result.returncode}\n"
+                f"STDOUT:\n{result.stdout}\n"
+                f"STDERR:\n{result.stderr}"
+            )
+
+        try:
+            test_result = json.loads(result.stdout.strip())
+        except json.JSONDecodeError as e:
+            pytest.fail(
+                f"Failed to parse test output as JSON:\n"
+                f"Error: {e}\n"
+                f"STDOUT:\n{result.stdout}\n"
+                f"STDERR:\n{result.stderr}"
+            )
+
+        # THE BUG CHECK: Temp directories should NOT leak after clone failure
+        if test_result['bug_detected']:
+            leaked_dirs = test_result['leaked_dirs']
+
+            pytest.fail(
+                f"BUG DETECTED (Issue #418): Temp directory leaked after git clone failure!\n\n"
+                f"Location: pdd/agentic_change.py:111-132\n\n"
+                f"Scenario:\n"
+                f"  1. _setup_repository() creates temp directory at line 112:\n"
+                f"     temp_dir = Path(tempfile.mkdtemp(prefix=f'pdd_{{repo}}_'))\n"
+                f"  2. Git clone fails (non-existent repo, auth failure, network issue)\n"
+                f"  3. RuntimeError raised at line 129 or 131 WITHOUT cleanup\n"
+                f"  4. Temp directory never deleted - LEAKED!\n\n"
+                f"Results:\n"
+                f"  - Temp dirs before: {test_result['before_count']}\n"
+                f"  - Temp dirs after: {test_result['after_count']}\n"
+                f"  - Leaked directories: {test_result['temp_dirs_created']}\n"
+                f"  - Leaked paths:\n    " + "\n    ".join(leaked_dirs) + "\n\n"
+                f"Impact:\n"
+                f"  - Users with failed clones leak temp directories\n"
+                f"  - Common scenarios: auth failures, network issues, typos in repo names\n"
+                f"  - Accumulates over time, consuming disk space\n"
+                f"  - Can lead to 'No space left on device' in extreme cases\n\n"
+                f"Root Cause:\n"
+                f"  - temp_dir created at line 112 but never cleaned up\n"
+                f"  - RuntimeError raised at lines 129, 131 without calling shutil.rmtree()\n"
+                f"  - No try-finally block to guarantee cleanup\n\n"
+                f"Fix:\n"
+                f"  Add cleanup before raising exceptions:\n"
+                f"  if result.returncode != 0:\n"
+                f"      shutil.rmtree(temp_dir, ignore_errors=True)  # Cleanup\n"
+                f"      raise RuntimeError(...)\n\n"
+                f"  Or use try-finally:\n"
+                f"  try:\n"
+                f"      result = subprocess.run(...)\n"
+                f"  except Exception as e:\n"
+                f"      shutil.rmtree(temp_dir, ignore_errors=True)  # Cleanup\n"
+                f"      raise\n\n"
+                f"Reference:\n"
+                f"  - pdd/agentic_test.py:161-162 shows correct pattern:\n"
+                f"    shutil.rmtree(temp_dir, ignore_errors=True)"
+            )
+
+    def test_temp_dir_leaked_on_subprocess_exception(self, tmp_path: Path):
+        """
+        E2E Test: _setup_repository() leaks temp dir when subprocess.run() raises exception.
+
+        This test verifies that even when subprocess.run() itself raises an exception
+        (not just returning non-zero exit code), the temp directory still leaks.
+
+        User-facing scenarios:
+        - Command not found (gh not installed)
+        - Permission denied errors
+        - System resource exhaustion
+        - Disk space issues during subprocess execution
+        """
+        project_root = get_project_root()
+
+        test_script = f'''
+import sys
+import json
+import os
+import glob
+import tempfile
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+sys.path.insert(0, "{project_root}")
+
+from pdd.agentic_change import _setup_repository
+
+def count_pdd_temp_dirs():
+    """Count temp directories matching pdd_* pattern."""
+    temp_root = tempfile.gettempdir()
+    pattern = os.path.join(temp_root, "pdd_*")
+    return len(glob.glob(pattern))
+
+if __name__ == "__main__":
+    before_count = count_pdd_temp_dirs()
+
+    exception_raised = False
+
+    # Mock subprocess.run to raise an exception
+    with patch('pdd.agentic_change.subprocess.run') as mock_run:
+        mock_run.side_effect = OSError("Command not found: gh")
+
+        try:
+            _setup_repository(
+                owner="test-owner",
+                repo="test-repo",
+                quiet=True
+            )
+        except (RuntimeError, OSError) as e:
+            exception_raised = True
+
+    after_count = count_pdd_temp_dirs()
+
+    # Find leaked directories
+    temp_root = tempfile.gettempdir()
+    leaked_dirs = glob.glob(os.path.join(temp_root, "pdd_test-repo-*"))
+
+    result = {{
+        "before_count": before_count,
+        "after_count": after_count,
+        "exception_raised": exception_raised,
+        "leaked_dirs": leaked_dirs,
+        "temp_dirs_created": after_count - before_count,
+        "bug_detected": after_count > before_count
+    }}
+
+    print(json.dumps(result))
+'''
+
+        script_file = tmp_path / "test_subprocess_exception_leak.py"
+        script_file.write_text(test_script)
+
+        env = os.environ.copy()
+        env['PYTHONPATH'] = str(project_root)
+        env['PDD_FORCE_LOCAL'] = '1'
+
+        result = subprocess.run(
+            [sys.executable, str(script_file)],
+            capture_output=True,
+            text=True,
+            cwd=str(tmp_path),
+            env=env,
+            timeout=30
+        )
+
+        if result.returncode != 0:
+            pytest.fail(f"Test script failed:\n{result.stderr}")
+
+        try:
+            test_result = json.loads(result.stdout.strip())
+        except json.JSONDecodeError:
+            pytest.fail(f"Failed to parse output:\n{result.stdout}")
+
+        if test_result['bug_detected']:
+            pytest.fail(
+                f"BUG DETECTED (Issue #418): Temp directory leaked after subprocess exception!\n\n"
+                f"This confirms the bug affects both failure modes:\n"
+                f"  1. Non-zero exit code (line 129)\n"
+                f"  2. Exception from subprocess.run() (line 131)\n\n"
+                f"Leaked directories: {test_result['temp_dirs_created']}\n"
+                f"Paths: {test_result['leaked_dirs']}\n\n"
+                f"Both exception paths must cleanup temp_dir before raising."
+            )
+
+    def test_temp_dir_cleaned_up_on_successful_clone(self, tmp_path: Path):
+        """
+        E2E Test: Successful clone should NOT clean up temp directory.
+
+        This is a regression test to ensure:
+        1. Normal clone operation works correctly
+        2. Temp directory is returned and used (NOT deleted)
+        3. The fix doesn't break existing functionality
+
+        Note: This test verifies that successful clones continue to work correctly.
+        The temp directory is intentionally kept for successful clones (it's used
+        as the working directory). Only FAILED clones should clean up.
+        """
+        project_root = get_project_root()
+
+        test_script = f'''
+import sys
+import json
+import os
+from pathlib import Path
+from unittest.mock import patch, MagicMock
+
+sys.path.insert(0, "{project_root}")
+
+from pdd.agentic_change import _setup_repository
+
+if __name__ == "__main__":
+    # Mock subprocess.run to simulate successful clone
+    with patch('pdd.agentic_change.subprocess.run') as mock_run:
+        # Simulate successful clone
+        mock_result = MagicMock()
+        mock_result.returncode = 0
+        mock_result.stderr = ""
+        mock_run.return_value = mock_result
+
+        try:
+            result_dir = _setup_repository(
+                owner="test-owner",
+                repo="test-repo",
+                quiet=True
+            )
+
+            # Successful clone should return the temp directory
+            success = True
+            temp_dir_exists = result_dir.exists()
+            temp_dir_path = str(result_dir)
+        except Exception as e:
+            success = False
+            temp_dir_exists = False
+            temp_dir_path = ""
+
+    result = {{
+        "success": success,
+        "temp_dir_exists": temp_dir_exists,
+        "temp_dir_path": temp_dir_path
+    }}
+
+    print(json.dumps(result))
+'''
+
+        script_file = tmp_path / "test_successful_clone.py"
+        script_file.write_text(test_script)
+
+        env = os.environ.copy()
+        env['PYTHONPATH'] = str(project_root)
+        env['PDD_FORCE_LOCAL'] = '1'
+
+        result = subprocess.run(
+            [sys.executable, str(script_file)],
+            capture_output=True,
+            text=True,
+            cwd=str(tmp_path),
+            env=env,
+            timeout=30
+        )
+
+        if result.returncode != 0:
+            pytest.fail(f"Test script failed:\n{result.stderr}")
+
+        try:
+            test_result = json.loads(result.stdout.strip())
+        except json.JSONDecodeError:
+            pytest.fail(f"Failed to parse output:\n{result.stdout}")
+
+        # Successful clone should work normally
+        assert test_result['success'], (
+            f"Successful clone failed! This is a regression.\n"
+            f"The fix should only cleanup on FAILED clones, not successful ones."
+        )
+
+        assert test_result['temp_dir_exists'], (
+            f"Temp directory was deleted after successful clone!\n"
+            f"This is a regression - successful clones should keep the temp directory.\n"
+            f"Path: {test_result['temp_dir_path']}\n\n"
+            f"Only FAILED clones should cleanup temp_dir before raising exception."
+        )
+
+    def test_multiple_clone_failures_accumulate_leaked_dirs(self, tmp_path: Path):
+        """
+        E2E Test: Multiple clone failures accumulate leaked temp directories.
+
+        This test demonstrates the cumulative impact of the bug:
+        1. Each failed clone creates a new leaked temp directory
+        2. Over time, this accumulates and wastes significant disk space
+        3. In production, this can lead to disk space exhaustion
+
+        This is an integration test showing the real-world impact when users
+        experience multiple clone failures (common with typos, auth issues, etc.).
+        """
+        project_root = get_project_root()
+
+        test_script = f'''
+import sys
+import json
+import os
+import glob
+import tempfile
+
+sys.path.insert(0, "{project_root}")
+
+from pdd.agentic_change import _setup_repository
+
+def count_pdd_temp_dirs():
+    """Count temp directories matching pdd_* pattern."""
+    temp_root = tempfile.gettempdir()
+    pattern = os.path.join(temp_root, "pdd_*")
+    return len(glob.glob(pattern))
+
+if __name__ == "__main__":
+    before_count = count_pdd_temp_dirs()
+
+    # Simulate multiple clone failures (e.g., user keeps trying with typos)
+    num_failures = 3
+    failures = []
+
+    for i in range(num_failures):
+        try:
+            _setup_repository(
+                owner="nonexistent-user",
+                repo=f"nonexistent-repo-{{i}}",
+                quiet=True
+            )
+        except RuntimeError:
+            failures.append(i)
+
+    after_count = count_pdd_temp_dirs()
+
+    result = {{
+        "before_count": before_count,
+        "after_count": after_count,
+        "num_failures": len(failures),
+        "leaked_count": after_count - before_count,
+        "bug_detected": (after_count - before_count) >= num_failures
+    }}
+
+    print(json.dumps(result))
+'''
+
+        script_file = tmp_path / "test_accumulation.py"
+        script_file.write_text(test_script)
+
+        env = os.environ.copy()
+        env['PYTHONPATH'] = str(project_root)
+        env['PDD_FORCE_LOCAL'] = '1'
+
+        result = subprocess.run(
+            [sys.executable, str(script_file)],
+            capture_output=True,
+            text=True,
+            cwd=str(tmp_path),
+            env=env,
+            timeout=60
+        )
+
+        if result.returncode != 0:
+            pytest.fail(f"Test script failed:\n{result.stderr}")
+
+        try:
+            test_result = json.loads(result.stdout.strip())
+        except json.JSONDecodeError:
+            pytest.fail(f"Failed to parse output:\n{result.stdout}")
+
+        if test_result['bug_detected']:
+            pytest.fail(
+                f"BUG DETECTED (Issue #418): Multiple failures accumulate leaked directories!\n\n"
+                f"Cumulative Impact:\n"
+                f"  - Number of clone failures: {test_result['num_failures']}\n"
+                f"  - Leaked temp directories: {test_result['leaked_count']}\n"
+                f"  - Before test: {test_result['before_count']} temp dirs\n"
+                f"  - After test: {test_result['after_count']} temp dirs\n\n"
+                f"Real-World Impact:\n"
+                f"  - Users retrying failed commands leak multiple directories\n"
+                f"  - CI/CD systems running many pdd commands accumulate leaks\n"
+                f"  - Over time, disk space is gradually consumed\n"
+                f"  - Can eventually cause 'No space left on device' errors\n\n"
+                f"This demonstrates why proper resource cleanup is critical!"
+            )

From 5eabff94ed359405311fd607323247586772594d Mon Sep 17 00:00:00 2001
From: Serhan <serhanasad2013@live.com>
Date: Wed, 28 Jan 2026 12:22:51 -0500
Subject: [PATCH 2/5] fix: Resource leak: Temp directories not cleaned up when
 git clone fails

Fixes #418
---
 prompts | 1 -
 1 file changed, 1 deletion(-)
 delete mode 120000 prompts

diff --git a/prompts b/prompts
deleted file mode 120000
index 334bf993..00000000
--- a/prompts
+++ /dev/null
@@ -1 +0,0 @@
-pdd/prompts
\ No newline at end of file

From d39085e302c889e802ef27a5bed7dc2c9750b860 Mon Sep 17 00:00:00 2001
From: Serhan <serhanasad2013@live.com>
Date: Wed, 28 Jan 2026 12:38:43 -0500
Subject: [PATCH 3/5] fix: Clean up temp directories when git clone fails

Fixes #418

Added shutil.rmtree() cleanup in both failure paths:
- When git clone returns non-zero exit code (line 128)
- When subprocess.run raises an exception (line 131)

This prevents disk space leaks from accumulated temp directories
when clone operations fail due to network issues, auth failures,
or other errors.

Co-Authored-By: Claude Sonnet 4.5 <noreply@anthropic.com>
---
 pdd/agentic_change.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/pdd/agentic_change.py b/pdd/agentic_change.py
index bc4acdce..b45db0b9 100644
--- a/pdd/agentic_change.py
+++ b/pdd/agentic_change.py
@@ -15,7 +15,6 @@
 
 console = Console()
 
-
 def _escape_format_braces(text: str) -> str:
     """
     Escape curly braces in text to prevent Python's .format() from
@@ -126,8 +125,10 @@ def _setup_repository(owner: str, repo: str, quiet: bool) -> Path:
             check=False
         )
         if result.returncode != 0:
+            shutil.rmtree(temp_dir, ignore_errors=True)  # Cleanup on failure
             raise RuntimeError(f"Failed to clone repository: {result.stderr.strip()}")
     except Exception as e:
+        shutil.rmtree(temp_dir, ignore_errors=True)  # Cleanup on exception
         raise RuntimeError(f"Failed to execute clone command: {e}")
 
     return temp_dir

From 404f87efe795407029485a9292080992542c9302 Mon Sep 17 00:00:00 2001
From: Serhan <serhanasad2013@live.com>
Date: Wed, 28 Jan 2026 12:57:16 -0500
Subject: [PATCH 4/5] refactor: Simplify tests for issue #418

Reduced test verbosity:
- Removed excessive comments and documentation
- Consolidated redundant test cases
- Reduced from 8 tests to 5 essential tests
- 78% reduction in test code (985 -> 218 lines)

Tests still cover all critical paths:
- Clone failure cleanup (non-zero exit)
- Exception cleanup
- Success case (regression)
---
 tests/test_agentic_change_issue_418.py | 575 +++++--------------------
 tests/test_e2e_issue_418.py            | 527 +++-------------------
 2 files changed, 167 insertions(+), 935 deletions(-)

diff --git a/tests/test_agentic_change_issue_418.py b/tests/test_agentic_change_issue_418.py
index 69c43d24..7967c55b 100644
--- a/tests/test_agentic_change_issue_418.py
+++ b/tests/test_agentic_change_issue_418.py
@@ -1,31 +1,6 @@
-"""
-Unit Tests for Issue #418: Resource Leak - Temp Directories Not Cleaned Up When Git Clone Fails
-
-This test verifies that temporary directories created during repository setup are
-properly cleaned up when git clone operations fail.
-
-The Bug:
-- _setup_repository() creates a temp directory with tempfile.mkdtemp() at line 112
-- When git clone fails, RuntimeError is raised at lines 129 or 131
-- The temp directory is never cleaned up, causing disk space leaks
-- Each failed operation leaves an orphaned directory on disk
-
-Test Strategy:
-- Use mocking to simulate clone failures
-- Track temp directory creation and verify cleanup
-- Cover both failure modes: non-zero exit code and subprocess exceptions
-- Include regression test to ensure successful clones still work
-
-User-Facing Impact:
-- Network interruptions during clone
-- Git authentication failures
-- Rate limiting or private repository access errors
-- Non-existent repositories
-- Each failure leaks disk space permanently
-"""
+"""Tests for Issue #418: Temp directory cleanup on git clone failure."""
 
 import json
-import tempfile
 from pathlib import Path
 from unittest.mock import MagicMock, patch
 
@@ -34,435 +9,119 @@
 from pdd.agentic_change import run_agentic_change
 
 
-class TestIssue418TempDirectoryLeak:
-    """
-    Unit tests for Issue #418: Temp directory resource leak when git clone fails.
-    """
-
-    def test_temp_dir_cleaned_up_on_clone_failure_nonzero_exit(self, tmp_path: Path):
-        """
-        Test that temp directory is cleaned up when git clone returns non-zero exit code.
-
-        Scenario:
-        1. run_agentic_change() is called with a valid issue URL
-        2. _setup_repository() creates a temp directory (line 112)
-        3. gh repo clone command returns non-zero exit code (network error, auth failure, etc.)
-        4. RuntimeError is raised at line 129
-
-        Expected: Temp directory should be cleaned up before raising the error
-        Bug behavior: Temp directory remains on disk (leaked)
-
-        This covers the most common failure scenarios:
-        - Network interruptions
-        - Authentication failures
-        - Non-existent repositories
-        - Rate limiting
-        """
-        # Create a real temp directory that we can verify exists/doesn't exist
-        real_temp_dir = tmp_path / "pdd_test_repo_clone_fail"
-        real_temp_dir.mkdir()
-
-        with patch("pdd.agentic_change.shutil.which") as mock_which, \
-             patch("pdd.agentic_change.subprocess.run") as mock_subprocess, \
-             patch("pdd.agentic_change.tempfile.mkdtemp") as mock_mkdtemp, \
-             patch("pdd.agentic_change.console"), \
-             patch("pdd.agentic_change.Path.cwd") as mock_cwd:
-
-            # Setup: gh is installed
-            mock_which.return_value = "/usr/bin/gh"
-
-            # Setup: temp directory creation returns our test path
-            mock_mkdtemp.return_value = str(real_temp_dir)
-
-            # Setup: Not in the target repository (force clone path)
-            mock_cwd.return_value.__truediv__.return_value.exists.return_value = False
-
-            # Setup: Issue API call succeeds
-            issue_data = {
-                "title": "Test Issue",
-                "body": "Test body",
-                "user": {"login": "testuser"},
-                "comments_url": "https://api.github.com/repos/owner/repo/issues/1/comments"
-            }
-
-            def subprocess_side_effect(args, **kwargs):
-                cmd = args if isinstance(args, list) else []
-
-                # Issue API call succeeds (check the full path)
-                if "api" in cmd and len(cmd) > 2 and "repos/owner/repo/issues/1" in cmd[2]:
-                    m = MagicMock()
-                    m.returncode = 0
-                    m.stdout = json.dumps(issue_data)
-                    return m
-
-                # Comments API call succeeds
-                if "api" in cmd and len(cmd) > 2 and "comments" in cmd[2]:
-                    m = MagicMock()
-                    m.returncode = 0
-                    m.stdout = "[]"
-                    return m
-
-                # Git remote check fails (not in repo)
-                if "git" in cmd[0] and "remote" in cmd:
-                    m = MagicMock()
-                    m.returncode = 1
-                    return m
-
-                # Clone fails with non-zero exit code
-                if "repo" in cmd and "clone" in cmd:
-                    m = MagicMock()
-                    m.returncode = 1
-                    m.stderr = "fatal: repository not found"
-                    return m
-
-                # Default
-                m = MagicMock()
-                m.returncode = 0
-                m.stdout = ""
-                return m
-
-            mock_subprocess.side_effect = subprocess_side_effect
-
-            # Verify temp dir exists before the call
-            assert real_temp_dir.exists(), "Test setup failed: temp_dir should exist"
-
-            # Execute: Call run_agentic_change which should fail and clean up
-            success, msg, _, _, _ = run_agentic_change("https://github.com/owner/repo/issues/1")
-
-            # Verify the function failed as expected
-            assert success is False
-            assert "Failed to clone repository" in msg
-
-            # THE BUG CHECK: Temp directory should be cleaned up
-            if real_temp_dir.exists():
-                pytest.fail(
-                    f"BUG DETECTED (Issue #418): Temp directory was not cleaned up!\n\n"
-                    f"Scenario:\n"
-                    f"  1. _setup_repository() creates temp_dir at line 112: tempfile.mkdtemp()\n"
-                    f"  2. gh repo clone returns non-zero exit code (line 128)\n"
-                    f"  3. RuntimeError raised at line 129: 'Failed to clone repository'\n"
-                    f"  4. Temp directory never cleaned up - LEAKED!\n\n"
-                    f"Results:\n"
-                    f"  - Temp directory path: {real_temp_dir}\n"
-                    f"  - Directory exists after error: {real_temp_dir.exists()} ⚠️\n"
-                    f"  - Expected: Directory deleted before raising error\n\n"
-                    f"Impact:\n"
-                    f"  - Each failed clone leaks a temp directory\n"
-                    f"  - Common triggers: network errors, auth failures, rate limiting\n"
-                    f"  - Permanent disk space leak until manual cleanup\n\n"
-                    f"Root Cause:\n"
-                    f"  - Line 112 creates temp directory with mkdtemp()\n"
-                    f"  - Line 129 raises RuntimeError without cleanup\n"
-                    f"  - No shutil.rmtree() call before exception\n\n"
-                    f"Fix:\n"
-                    f"  Add cleanup before raising at line 129:\n"
-                    f"    if result.returncode != 0:\n"
-                    f"        shutil.rmtree(temp_dir, ignore_errors=True)\n"
-                    f"        raise RuntimeError(...)\n"
-                )
-
-    def test_temp_dir_cleaned_up_on_subprocess_exception(self, tmp_path: Path):
-        """
-        Test that temp directory is cleaned up when subprocess.run raises an exception.
-
-        Scenario:
-        1. run_agentic_change() is called with a valid issue URL
-        2. _setup_repository() creates a temp directory (line 112)
-        3. subprocess.run() raises an exception (OSError, command not found, etc.)
-        4. Exception is caught at line 130 and RuntimeError is raised at line 131
-
-        Expected: Temp directory should be cleaned up before re-raising the error
-        Bug behavior: Temp directory remains on disk (leaked)
-
-        This covers less common but still real scenarios:
-        - gh command not found or corrupted
-        - OS-level errors (disk full, permission denied)
-        - System resource exhaustion
-        """
-        real_temp_dir = tmp_path / "pdd_test_repo_exception"
-        real_temp_dir.mkdir()
-
-        with patch("pdd.agentic_change.shutil.which") as mock_which, \
-             patch("pdd.agentic_change.subprocess.run") as mock_subprocess, \
-             patch("pdd.agentic_change.tempfile.mkdtemp") as mock_mkdtemp, \
-             patch("pdd.agentic_change.console"), \
-             patch("pdd.agentic_change.Path.cwd") as mock_cwd:
-
-            mock_which.return_value = "/usr/bin/gh"
-            mock_mkdtemp.return_value = str(real_temp_dir)
-            mock_cwd.return_value.__truediv__.return_value.exists.return_value = False
-
-            issue_data = {
-                "title": "Test Issue",
-                "body": "Test body",
-                "user": {"login": "testuser"},
-                "comments_url": "https://api.github.com/repos/owner/repo/issues/1/comments"
-            }
-
-            call_count = [0]
-
-            def subprocess_side_effect(args, **kwargs):
-                cmd = args if isinstance(args, list) else []
-
-                # Issue API call succeeds
-                if "api" in cmd and len(cmd) > 2 and "repos/owner/repo/issues/1" in cmd[2]:
-                    m = MagicMock()
-                    m.returncode = 0
-                    m.stdout = json.dumps(issue_data)
-                    return m
-
-                # Comments API call succeeds
-                if "api" in cmd and len(cmd) > 2 and "comments" in cmd[2]:
-                    m = MagicMock()
-                    m.returncode = 0
-                    m.stdout = "[]"
-                    return m
-
-                # Git remote check fails
-                if "git" in cmd[0] and "remote" in cmd:
-                    m = MagicMock()
-                    m.returncode = 1
-                    return m
-
-                # Clone raises exception
-                if "repo" in cmd and "clone" in cmd:
-                    raise OSError("Command not found: gh")
-
-                m = MagicMock()
-                m.returncode = 0
-                return m
-
-            mock_subprocess.side_effect = subprocess_side_effect
-
-            # Verify temp dir exists before the call
-            assert real_temp_dir.exists(), "Test setup failed: temp_dir should exist"
-
-            # Execute: Call run_agentic_change which should fail and clean up
-            success, msg, _, _, _ = run_agentic_change("https://github.com/owner/repo/issues/1")
-
-            # Verify the function failed as expected
-            assert success is False
-            assert "Failed to execute clone command" in msg
-
-            # THE BUG CHECK: Temp directory should be cleaned up
-            if real_temp_dir.exists():
-                pytest.fail(
-                    f"BUG DETECTED (Issue #418): Temp directory leaked on subprocess exception!\n\n"
-                    f"Scenario:\n"
-                    f"  1. _setup_repository() creates temp_dir at line 112\n"
-                    f"  2. subprocess.run(['gh', 'repo', 'clone', ...]) raises exception (line 121)\n"
-                    f"  3. Exception caught at line 130: except Exception as e\n"
-                    f"  4. RuntimeError raised at line 131: 'Failed to execute clone command'\n"
-                    f"  5. Temp directory never cleaned up - LEAKED!\n\n"
-                    f"Results:\n"
-                    f"  - Temp directory path: {real_temp_dir}\n"
-                    f"  - Directory exists after exception: {real_temp_dir.exists()} ⚠️\n"
-                    f"  - Expected: Directory deleted in exception handler\n\n"
-                    f"Impact:\n"
-                    f"  - Any subprocess exception leaks temp directory\n"
-                    f"  - Triggers: missing gh command, OS errors, resource exhaustion\n"
-                    f"  - Permanent disk space leak\n\n"
-                    f"Fix:\n"
-                    f"  Add cleanup in exception handler at line 130:\n"
-                    f"    except Exception as e:\n"
-                    f"        shutil.rmtree(temp_dir, ignore_errors=True)\n"
-                    f"        raise RuntimeError(...)\n"
-                )
-
-    def test_temp_dir_not_cleaned_up_on_successful_clone(self, tmp_path: Path):
-        """
-        Regression test: Successful clone should NOT clean up the temp directory.
-
-        Scenario:
-        1. run_agentic_change() is called with a valid issue URL
-        2. _setup_repository() creates a temp directory
-        3. gh repo clone succeeds
-        4. Temp directory is returned to caller for use
-
-        Expected: Temp directory should exist and be returned
-        Purpose: Ensure the fix doesn't over-aggressively clean up on success
-
-        This is a regression test to ensure that our fix only cleans up on failure,
-        and doesn't break the normal successful flow.
-        """
-        real_temp_dir = tmp_path / "pdd_test_repo_success"
-        real_temp_dir.mkdir()
-
-        with patch("pdd.agentic_change.shutil.which") as mock_which, \
-             patch("pdd.agentic_change.subprocess.run") as mock_subprocess, \
-             patch("pdd.agentic_change.tempfile.mkdtemp") as mock_mkdtemp, \
-             patch("pdd.agentic_change.run_agentic_change_orchestrator") as mock_orch, \
-             patch("pdd.agentic_change.console"), \
-             patch("pdd.agentic_change.Path.cwd") as mock_cwd:
-
-            mock_which.return_value = "/usr/bin/gh"
-            mock_mkdtemp.return_value = str(real_temp_dir)
-            mock_cwd.return_value.__truediv__.return_value.exists.return_value = False
-
-            # Orchestrator returns success
-            mock_orch.return_value = (True, "Success", 1.0, "gpt-4", ["file.py"])
-
-            issue_data = {
-                "title": "Test Issue",
-                "body": "Test body",
-                "user": {"login": "testuser"},
-                "comments_url": "https://api.github.com/repos/owner/repo/issues/1/comments"
-            }
-
-            def subprocess_side_effect(args, **kwargs):
-                cmd = args if isinstance(args, list) else []
-
-                # Issue API call succeeds
-                if "api" in cmd and len(cmd) > 2 and "repos/owner/repo/issues/1" in cmd[2]:
-                    m = MagicMock()
-                    m.returncode = 0
-                    m.stdout = json.dumps(issue_data)
-                    return m
-
-                # Comments API call succeeds
-                if "api" in cmd and len(cmd) > 2 and "comments" in cmd[2]:
-                    m = MagicMock()
-                    m.returncode = 0
-                    m.stdout = "[]"
-                    return m
-
-                # Git remote check fails
-                if "git" in cmd[0] and "remote" in cmd:
-                    m = MagicMock()
-                    m.returncode = 1
-                    return m
-
-                # Clone succeeds
-                if "repo" in cmd and "clone" in cmd:
-                    m = MagicMock()
-                    m.returncode = 0
-                    m.stdout = "Cloning..."
-                    return m
-
-                m = MagicMock()
-                m.returncode = 0
-                return m
-
-            mock_subprocess.side_effect = subprocess_side_effect
-
-            # Execute: Call run_agentic_change which should succeed
-            success, msg, _, _, _ = run_agentic_change("https://github.com/owner/repo/issues/1")
-
-            # Verify the function succeeded
-            assert success is True
-
-            # Regression check: On success, temp directory should still exist
-            # (it's the caller's responsibility to use and clean it up)
-            assert real_temp_dir.exists(), (
-                f"Regression failure: Temp directory was deleted on successful clone!\n\n"
-                f"The fix should only clean up on FAILURE, not on success.\n"
-                f"Successful clones need the temp directory for subsequent operations.\n\n"
-                f"Temp directory path: {real_temp_dir}\n"
-                f"Expected: Directory exists after successful clone\n"
-                f"Actual: Directory was deleted (broke existing functionality)"
-            )
-
-    def test_multiple_failures_accumulate_leaked_directories(self, tmp_path: Path):
-        """
-        Integration test: Multiple sequential failures accumulate leaked directories.
-
-        Scenario:
-        1. Call run_agentic_change() multiple times with different failing scenarios
-        2. Each call creates a new temp directory
-        3. Each call fails and should clean up its temp directory
-
-        Expected: No temp directories leaked (each failure cleans up)
-        Bug behavior: Each failure leaks a directory, accumulating over time
-
-        This demonstrates the real-world impact where repeated operations (e.g., in CI/CD,
-        automated workflows, or retry loops) cause significant disk waste over time.
-        """
-        leaked_dirs = []
-
-        # Simulate 3 consecutive failures
-        for i in range(3):
-            real_temp_dir = tmp_path / f"pdd_test_repo_multi_{i}"
-            real_temp_dir.mkdir()
-            leaked_dirs.append(real_temp_dir)
-
-            with patch("pdd.agentic_change.shutil.which") as mock_which, \
-                 patch("pdd.agentic_change.subprocess.run") as mock_subprocess, \
-                 patch("pdd.agentic_change.tempfile.mkdtemp") as mock_mkdtemp, \
-                 patch("pdd.agentic_change.console"), \
-                 patch("pdd.agentic_change.Path.cwd") as mock_cwd:
-
-                mock_which.return_value = "/usr/bin/gh"
-                mock_mkdtemp.return_value = str(real_temp_dir)
-                mock_cwd.return_value.__truediv__.return_value.exists.return_value = False
-
-                issue_data = {
-                    "title": f"Test Issue {i}",
-                    "body": f"Test body {i}",
-                    "user": {"login": "testuser"},
-                    "comments_url": f"https://api.github.com/repos/owner/repo/issues/{i}/comments"
-                }
-
-                def subprocess_side_effect(args, **kwargs):
-                    cmd = args if isinstance(args, list) else []
-
-                    # Issue API call succeeds
-                    if "api" in cmd and len(cmd) > 2 and f"repos/owner/repo/issues/{i}" in cmd[2]:
-                        m = MagicMock()
-                        m.returncode = 0
-                        m.stdout = json.dumps(issue_data)
-                        return m
-
-                    # Comments API call succeeds
-                    if "api" in cmd and len(cmd) > 2 and "comments" in cmd[2]:
-                        m = MagicMock()
-                        m.returncode = 0
-                        m.stdout = "[]"
-                        return m
-
-                    # Git remote check fails
-                    if "git" in cmd[0] and "remote" in cmd:
-                        m = MagicMock()
-                        m.returncode = 1
-                        return m
-
-                    # Clone fails
-                    if "repo" in cmd and "clone" in cmd:
-                        m = MagicMock()
-                        m.returncode = 1
-                        m.stderr = f"Error {i}: Network timeout"
-                        return m
-
-                    m = MagicMock()
-                    m.returncode = 0
-                    return m
-
-                mock_subprocess.side_effect = subprocess_side_effect
-
-                # Execute: This should fail and clean up
-                success, _, _, _, _ = run_agentic_change(f"https://github.com/owner/repo/issues/{i}")
-                assert success is False
-
-        # THE BUG CHECK: Count how many directories were leaked
-        leaked_count = sum(1 for d in leaked_dirs if d.exists())
-
-        if leaked_count > 0:
-            leaked_paths = [str(d) for d in leaked_dirs if d.exists()]
-            pytest.fail(
-                f"BUG DETECTED (Issue #418): Multiple failures accumulated leaked directories!\n\n"
-                f"Scenario:\n"
-                f"  - Simulated 3 consecutive clone failures\n"
-                f"  - Each created a new temp directory\n"
-                f"  - Each should have cleaned up on failure\n\n"
-                f"Results:\n"
-                f"  - Total operations: 3\n"
-                f"  - Directories leaked: {leaked_count} ⚠️\n"
-                f"  - Leaked paths:\n    " + "\n    ".join(leaked_paths) + "\n\n"
-                f"Impact:\n"
-                f"  - In CI/CD: Repeated job failures fill up disk over time\n"
-                f"  - In retry loops: Each retry leaks more space\n"
-                f"  - In long-running services: Memory/disk exhaustion\n"
-                f"  - Real-world example: 100 failures = 100 leaked directories\n\n"
-                f"Fix Required:\n"
-                f"  Every failure path must clean up temp_dir with shutil.rmtree()\n"
-                f"  before raising exceptions."
-            )
+def test_temp_dir_cleaned_up_on_clone_failure(tmp_path: Path):
+    """Test temp directory is cleaned up when git clone fails."""
+    real_temp_dir = tmp_path / "pdd_test_repo"
+    real_temp_dir.mkdir()
+
+    with patch("pdd.agentic_change.shutil.which") as mock_which, \
+         patch("pdd.agentic_change.subprocess.run") as mock_subprocess, \
+         patch("pdd.agentic_change.tempfile.mkdtemp", return_value=str(real_temp_dir)), \
+         patch("pdd.agentic_change.console"), \
+         patch("pdd.agentic_change.Path.cwd") as mock_cwd:
+
+        mock_which.return_value = "/usr/bin/gh"
+        mock_cwd.return_value.__truediv__.return_value.exists.return_value = False
+
+        issue_data = {
+            "title": "Test", "body": "Test", "user": {"login": "test"},
+            "comments_url": "https://api.github.com/repos/owner/repo/issues/1/comments"
+        }
+
+        def subprocess_side_effect(args, **kwargs):
+            cmd = args if isinstance(args, list) else []
+            if "api" in cmd and "repos/owner/repo/issues/1" in cmd[2]:
+                return MagicMock(returncode=0, stdout=json.dumps(issue_data))
+            if "api" in cmd and "comments" in cmd[2]:
+                return MagicMock(returncode=0, stdout="[]")
+            if "git" in cmd[0] and "remote" in cmd:
+                return MagicMock(returncode=1)
+            if "repo" in cmd and "clone" in cmd:
+                return MagicMock(returncode=1, stderr="fatal: repository not found")
+            return MagicMock(returncode=0, stdout="")
+
+        mock_subprocess.side_effect = subprocess_side_effect
+
+        assert real_temp_dir.exists()
+        success, msg, _, _, _ = run_agentic_change("https://github.com/owner/repo/issues/1")
+
+        assert success is False
+        assert "Failed to clone repository" in msg
+        assert not real_temp_dir.exists(), "Temp directory not cleaned up on failure"
+
+
+def test_temp_dir_cleaned_up_on_subprocess_exception(tmp_path: Path):
+    """Test temp directory is cleaned up when subprocess raises exception."""
+    real_temp_dir = tmp_path / "pdd_test_exception"
+    real_temp_dir.mkdir()
+
+    with patch("pdd.agentic_change.shutil.which", return_value="/usr/bin/gh"), \
+         patch("pdd.agentic_change.subprocess.run") as mock_subprocess, \
+         patch("pdd.agentic_change.tempfile.mkdtemp", return_value=str(real_temp_dir)), \
+         patch("pdd.agentic_change.console"), \
+         patch("pdd.agentic_change.Path.cwd") as mock_cwd:
+
+        mock_cwd.return_value.__truediv__.return_value.exists.return_value = False
+        issue_data = {
+            "title": "Test", "body": "Test", "user": {"login": "test"},
+            "comments_url": "https://api.github.com/repos/owner/repo/issues/1/comments"
+        }
+
+        def subprocess_side_effect(args, **kwargs):
+            cmd = args if isinstance(args, list) else []
+            if "api" in cmd and "repos/owner/repo/issues/1" in cmd[2]:
+                return MagicMock(returncode=0, stdout=json.dumps(issue_data))
+            if "api" in cmd and "comments" in cmd[2]:
+                return MagicMock(returncode=0, stdout="[]")
+            if "git" in cmd[0] and "remote" in cmd:
+                return MagicMock(returncode=1)
+            if "repo" in cmd and "clone" in cmd:
+                raise OSError("Command not found: gh")
+            return MagicMock(returncode=0)
+
+        mock_subprocess.side_effect = subprocess_side_effect
+
+        assert real_temp_dir.exists()
+        success, msg, _, _, _ = run_agentic_change("https://github.com/owner/repo/issues/1")
+
+        assert success is False
+        assert "Failed to execute clone command" in msg
+        assert not real_temp_dir.exists(), "Temp directory not cleaned up on exception"
+
+
+def test_temp_dir_not_cleaned_up_on_success(tmp_path: Path):
+    """Regression test: successful clone should keep temp directory."""
+    real_temp_dir = tmp_path / "pdd_test_success"
+    real_temp_dir.mkdir()
+
+    with patch("pdd.agentic_change.shutil.which", return_value="/usr/bin/gh"), \
+         patch("pdd.agentic_change.subprocess.run") as mock_subprocess, \
+         patch("pdd.agentic_change.tempfile.mkdtemp", return_value=str(real_temp_dir)), \
+         patch("pdd.agentic_change.run_agentic_change_orchestrator", return_value=(True, "Success", 1.0, "gpt-4", ["file.py"])), \
+         patch("pdd.agentic_change.console"), \
+         patch("pdd.agentic_change.Path.cwd") as mock_cwd:
+
+        mock_cwd.return_value.__truediv__.return_value.exists.return_value = False
+        issue_data = {
+            "title": "Test", "body": "Test", "user": {"login": "test"},
+            "comments_url": "https://api.github.com/repos/owner/repo/issues/1/comments"
+        }
+
+        def subprocess_side_effect(args, **kwargs):
+            cmd = args if isinstance(args, list) else []
+            if "api" in cmd and "repos/owner/repo/issues/1" in cmd[2]:
+                return MagicMock(returncode=0, stdout=json.dumps(issue_data))
+            if "api" in cmd and "comments" in cmd[2]:
+                return MagicMock(returncode=0, stdout="[]")
+            if "git" in cmd[0] and "remote" in cmd:
+                return MagicMock(returncode=1)
+            if "repo" in cmd and "clone" in cmd:
+                return MagicMock(returncode=0, stdout="Cloning...")
+            return MagicMock(returncode=0)
+
+        mock_subprocess.side_effect = subprocess_side_effect
+
+        success, msg, _, _, _ = run_agentic_change("https://github.com/owner/repo/issues/1")
+
+        assert success is True
+        assert real_temp_dir.exists(), "Successful clone should not cleanup temp directory"
diff --git a/tests/test_e2e_issue_418.py b/tests/test_e2e_issue_418.py
index f35f83da..0361b3c6 100644
--- a/tests/test_e2e_issue_418.py
+++ b/tests/test_e2e_issue_418.py
@@ -1,31 +1,4 @@
-"""
-E2E Test for Issue #418: Temp Directory Resource Leak in _setup_repository()
-
-This test verifies the bug at a system level by simulating what happens when
-a user triggers a git clone failure through the agentic_change workflow.
-
-The Bug:
-- _setup_repository() creates a temp directory at line 112: tempfile.mkdtemp()
-- When git clone fails, RuntimeError is raised at lines 129 or 131
-- The temp directory is never cleaned up before the exception is raised
-- Over time, this accumulates orphaned temp directories and wastes disk space
-
-E2E Test Strategy:
-- Use subprocess to run a Python script that:
-  1. Calls _setup_repository() with a repository that will fail to clone
-  2. Captures the created temp directory path
-  3. Verifies the temp directory still exists after the exception
-- This simulates real-world user behavior when clones fail
-- The test should FAIL on buggy code (temp dir exists) and PASS once fixed
-
-User-Facing Impact:
-- Users running pdd commands on invalid/private repositories
-- Network interruptions during git clone
-- Authentication failures
-- Rate limiting issues
-- Disk space gradually filled with orphaned temp directories
-- In extreme cases: "No space left on device" errors
-"""
+"""E2E tests for Issue #418: Temp directory cleanup on git clone failure."""
 
 import glob
 import json
@@ -35,481 +8,81 @@
 import tempfile
 from pathlib import Path
 
-import pytest
-
 
 def get_project_root() -> Path:
-    """Get the project root directory."""
-    current = Path(__file__).parent.parent
-    return current
-
-
-class TestTempDirLeakE2E:
-    """
-    E2E tests for Issue #418: Temp directory resource leak in _setup_repository()
-    when git clone fails.
-    """
+    return Path(__file__).parent.parent
 
-    def test_temp_dir_leaked_on_clone_failure_nonexistent_repo(self, tmp_path: Path):
-        """
-        E2E Test: _setup_repository() leaks temp directory when clone fails.
 
-        This test simulates the real-world scenario:
-        1. User runs a pdd command that calls _setup_repository()
-        2. Git clone fails (e.g., non-existent repo, auth failure, network issue)
-        3. Temp directory should be cleaned up, but due to the bug, it persists
+def test_temp_dir_cleaned_up_on_clone_failure(tmp_path: Path):
+    """E2E: Temp directory cleaned up when clone fails."""
+    project_root = get_project_root()
 
-        Expected behavior (after fix):
-        - Temp directory is deleted before RuntimeError is raised
-        - No pdd_* directories remain in /tmp after the exception
-
-        Bug behavior (Issue #418):
-        - Temp directory remains in /tmp after RuntimeError
-        - Multiple failures accumulate leaked temp directories
-        - Disk space is gradually consumed
-        """
-        project_root = get_project_root()
-
-        # Create a test script that simulates the bug scenario
-        test_script = f'''
-import sys
-import json
-import os
-import glob
-import tempfile
-from pathlib import Path
-
-# Add project root to path
+    test_script = f'''
+import sys, json, glob, tempfile, os
 sys.path.insert(0, "{project_root}")
-
 from pdd.agentic_change import _setup_repository
 
-def count_pdd_temp_dirs():
-    """Count temp directories matching pdd_* pattern."""
-    temp_root = tempfile.gettempdir()
-    pattern = os.path.join(temp_root, "pdd_*")
-    return len(glob.glob(pattern))
-
-if __name__ == "__main__":
-    # Count temp directories before
-    before_count = count_pdd_temp_dirs()
-
-    # Try to setup a repository that will fail to clone
-    # Using a non-existent repository to trigger clone failure
-    temp_dir = None
-    exception_raised = False
+temp_root = tempfile.gettempdir()
+before = len(glob.glob(os.path.join(temp_root, "pdd_*")))
 
-    try:
-        # This should create a temp directory and then fail during clone
-        temp_dir = _setup_repository(
-            owner="nonexistent-user-12345",
-            repo="nonexistent-repo-67890",
-            quiet=True
-        )
-    except RuntimeError as e:
-        # Expected - clone should fail
-        exception_raised = True
-        error_message = str(e)
-
-    # Count temp directories after
-    after_count = count_pdd_temp_dirs()
-
-    # Find any leaked pdd_* directories
-    temp_root = tempfile.gettempdir()
-    leaked_dirs = glob.glob(os.path.join(temp_root, "pdd_nonexistent-repo-*"))
+try:
+    _setup_repository("nonexistent-user", "nonexistent-repo", quiet=True)
+except RuntimeError:
+    pass
 
-    # Output results as JSON
-    result = {{
-        "before_count": before_count,
-        "after_count": after_count,
-        "exception_raised": exception_raised,
-        "leaked_dirs": leaked_dirs,
-        "temp_dirs_created": after_count - before_count,
-        "bug_detected": after_count > before_count
-    }}
-
-    print(json.dumps(result))
+after = len(glob.glob(os.path.join(temp_root, "pdd_*")))
+print(json.dumps({{"leaked": after > before}}))
 '''
 
-        # Write the test script to a temp file
-        script_file = tmp_path / "test_clone_failure_leak.py"
-        script_file.write_text(test_script)
-
-        # Run the test script in a subprocess with proper environment
-        env = os.environ.copy()
-        env['PYTHONPATH'] = str(project_root)
-        env['PDD_FORCE_LOCAL'] = '1'  # Prevent any real API calls if possible
-
-        result = subprocess.run(
-            [sys.executable, str(script_file)],
-            capture_output=True,
-            text=True,
-            cwd=str(tmp_path),
-            env=env,
-            timeout=30
-        )
+    script_file = tmp_path / "test_leak.py"
+    script_file.write_text(test_script)
 
-        # Parse the output
-        if result.returncode != 0:
-            pytest.fail(
-                f"Test script failed to run:\n"
-                f"Return code: {result.returncode}\n"
-                f"STDOUT:\n{result.stdout}\n"
-                f"STDERR:\n{result.stderr}"
-            )
+    env = os.environ.copy()
+    env['PYTHONPATH'] = str(project_root)
 
-        try:
-            test_result = json.loads(result.stdout.strip())
-        except json.JSONDecodeError as e:
-            pytest.fail(
-                f"Failed to parse test output as JSON:\n"
-                f"Error: {e}\n"
-                f"STDOUT:\n{result.stdout}\n"
-                f"STDERR:\n{result.stderr}"
-            )
+    result = subprocess.run(
+        [sys.executable, str(script_file)],
+        capture_output=True, text=True, timeout=30, env=env
+    )
 
-        # THE BUG CHECK: Temp directories should NOT leak after clone failure
-        if test_result['bug_detected']:
-            leaked_dirs = test_result['leaked_dirs']
+    assert result.returncode == 0, f"Script failed: {result.stderr}"
+    output = json.loads(result.stdout.strip())
+    assert not output['leaked'], "Temp directory leaked after clone failure"
 
-            pytest.fail(
-                f"BUG DETECTED (Issue #418): Temp directory leaked after git clone failure!\n\n"
-                f"Location: pdd/agentic_change.py:111-132\n\n"
-                f"Scenario:\n"
-                f"  1. _setup_repository() creates temp directory at line 112:\n"
-                f"     temp_dir = Path(tempfile.mkdtemp(prefix=f'pdd_{{repo}}_'))\n"
-                f"  2. Git clone fails (non-existent repo, auth failure, network issue)\n"
-                f"  3. RuntimeError raised at line 129 or 131 WITHOUT cleanup\n"
-                f"  4. Temp directory never deleted - LEAKED!\n\n"
-                f"Results:\n"
-                f"  - Temp dirs before: {test_result['before_count']}\n"
-                f"  - Temp dirs after: {test_result['after_count']}\n"
-                f"  - Leaked directories: {test_result['temp_dirs_created']}\n"
-                f"  - Leaked paths:\n    " + "\n    ".join(leaked_dirs) + "\n\n"
-                f"Impact:\n"
-                f"  - Users with failed clones leak temp directories\n"
-                f"  - Common scenarios: auth failures, network issues, typos in repo names\n"
-                f"  - Accumulates over time, consuming disk space\n"
-                f"  - Can lead to 'No space left on device' in extreme cases\n\n"
-                f"Root Cause:\n"
-                f"  - temp_dir created at line 112 but never cleaned up\n"
-                f"  - RuntimeError raised at lines 129, 131 without calling shutil.rmtree()\n"
-                f"  - No try-finally block to guarantee cleanup\n\n"
-                f"Fix:\n"
-                f"  Add cleanup before raising exceptions:\n"
-                f"  if result.returncode != 0:\n"
-                f"      shutil.rmtree(temp_dir, ignore_errors=True)  # Cleanup\n"
-                f"      raise RuntimeError(...)\n\n"
-                f"  Or use try-finally:\n"
-                f"  try:\n"
-                f"      result = subprocess.run(...)\n"
-                f"  except Exception as e:\n"
-                f"      shutil.rmtree(temp_dir, ignore_errors=True)  # Cleanup\n"
-                f"      raise\n\n"
-                f"Reference:\n"
-                f"  - pdd/agentic_test.py:161-162 shows correct pattern:\n"
-                f"    shutil.rmtree(temp_dir, ignore_errors=True)"
-            )
 
-    def test_temp_dir_leaked_on_subprocess_exception(self, tmp_path: Path):
-        """
-        E2E Test: _setup_repository() leaks temp dir when subprocess.run() raises exception.
+def test_temp_dir_kept_on_success(tmp_path: Path):
+    """E2E: Successful clone keeps temp directory."""
+    project_root = get_project_root()
 
-        This test verifies that even when subprocess.run() itself raises an exception
-        (not just returning non-zero exit code), the temp directory still leaks.
-
-        User-facing scenarios:
-        - Command not found (gh not installed)
-        - Permission denied errors
-        - System resource exhaustion
-        - Disk space issues during subprocess execution
-        """
-        project_root = get_project_root()
-
-        test_script = f'''
-import sys
-import json
-import os
-import glob
-import tempfile
+    test_script = f'''
+import sys, json, os
 from pathlib import Path
 from unittest.mock import patch, MagicMock
-
 sys.path.insert(0, "{project_root}")
-
 from pdd.agentic_change import _setup_repository
 
-def count_pdd_temp_dirs():
-    """Count temp directories matching pdd_* pattern."""
-    temp_root = tempfile.gettempdir()
-    pattern = os.path.join(temp_root, "pdd_*")
-    return len(glob.glob(pattern))
-
-if __name__ == "__main__":
-    before_count = count_pdd_temp_dirs()
-
-    exception_raised = False
-
-    # Mock subprocess.run to raise an exception
-    with patch('pdd.agentic_change.subprocess.run') as mock_run:
-        mock_run.side_effect = OSError("Command not found: gh")
-
-        try:
-            _setup_repository(
-                owner="test-owner",
-                repo="test-repo",
-                quiet=True
-            )
-        except (RuntimeError, OSError) as e:
-            exception_raised = True
-
-    after_count = count_pdd_temp_dirs()
-
-    # Find leaked directories
-    temp_root = tempfile.gettempdir()
-    leaked_dirs = glob.glob(os.path.join(temp_root, "pdd_test-repo-*"))
-
-    result = {{
-        "before_count": before_count,
-        "after_count": after_count,
-        "exception_raised": exception_raised,
-        "leaked_dirs": leaked_dirs,
-        "temp_dirs_created": after_count - before_count,
-        "bug_detected": after_count > before_count
-    }}
-
-    print(json.dumps(result))
-'''
-
-        script_file = tmp_path / "test_subprocess_exception_leak.py"
-        script_file.write_text(test_script)
-
-        env = os.environ.copy()
-        env['PYTHONPATH'] = str(project_root)
-        env['PDD_FORCE_LOCAL'] = '1'
-
-        result = subprocess.run(
-            [sys.executable, str(script_file)],
-            capture_output=True,
-            text=True,
-            cwd=str(tmp_path),
-            env=env,
-            timeout=30
-        )
-
-        if result.returncode != 0:
-            pytest.fail(f"Test script failed:\n{result.stderr}")
-
-        try:
-            test_result = json.loads(result.stdout.strip())
-        except json.JSONDecodeError:
-            pytest.fail(f"Failed to parse output:\n{result.stdout}")
-
-        if test_result['bug_detected']:
-            pytest.fail(
-                f"BUG DETECTED (Issue #418): Temp directory leaked after subprocess exception!\n\n"
-                f"This confirms the bug affects both failure modes:\n"
-                f"  1. Non-zero exit code (line 129)\n"
-                f"  2. Exception from subprocess.run() (line 131)\n\n"
-                f"Leaked directories: {test_result['temp_dirs_created']}\n"
-                f"Paths: {test_result['leaked_dirs']}\n\n"
-                f"Both exception paths must cleanup temp_dir before raising."
-            )
-
-    def test_temp_dir_cleaned_up_on_successful_clone(self, tmp_path: Path):
-        """
-        E2E Test: Successful clone should NOT clean up temp directory.
-
-        This is a regression test to ensure:
-        1. Normal clone operation works correctly
-        2. Temp directory is returned and used (NOT deleted)
-        3. The fix doesn't break existing functionality
-
-        Note: This test verifies that successful clones continue to work correctly.
-        The temp directory is intentionally kept for successful clones (it's used
-        as the working directory). Only FAILED clones should clean up.
-        """
-        project_root = get_project_root()
-
-        test_script = f'''
-import sys
-import json
-import os
-from pathlib import Path
-from unittest.mock import patch, MagicMock
-
-sys.path.insert(0, "{project_root}")
-
-from pdd.agentic_change import _setup_repository
-
-if __name__ == "__main__":
-    # Mock subprocess.run to simulate successful clone
-    with patch('pdd.agentic_change.subprocess.run') as mock_run:
-        # Simulate successful clone
-        mock_result = MagicMock()
-        mock_result.returncode = 0
-        mock_result.stderr = ""
-        mock_run.return_value = mock_result
-
-        try:
-            result_dir = _setup_repository(
-                owner="test-owner",
-                repo="test-repo",
-                quiet=True
-            )
-
-            # Successful clone should return the temp directory
-            success = True
-            temp_dir_exists = result_dir.exists()
-            temp_dir_path = str(result_dir)
-        except Exception as e:
-            success = False
-            temp_dir_exists = False
-            temp_dir_path = ""
-
-    result = {{
-        "success": success,
-        "temp_dir_exists": temp_dir_exists,
-        "temp_dir_path": temp_dir_path
-    }}
-
-    print(json.dumps(result))
-'''
-
-        script_file = tmp_path / "test_successful_clone.py"
-        script_file.write_text(test_script)
-
-        env = os.environ.copy()
-        env['PYTHONPATH'] = str(project_root)
-        env['PDD_FORCE_LOCAL'] = '1'
-
-        result = subprocess.run(
-            [sys.executable, str(script_file)],
-            capture_output=True,
-            text=True,
-            cwd=str(tmp_path),
-            env=env,
-            timeout=30
-        )
-
-        if result.returncode != 0:
-            pytest.fail(f"Test script failed:\n{result.stderr}")
-
-        try:
-            test_result = json.loads(result.stdout.strip())
-        except json.JSONDecodeError:
-            pytest.fail(f"Failed to parse output:\n{result.stdout}")
-
-        # Successful clone should work normally
-        assert test_result['success'], (
-            f"Successful clone failed! This is a regression.\n"
-            f"The fix should only cleanup on FAILED clones, not successful ones."
-        )
-
-        assert test_result['temp_dir_exists'], (
-            f"Temp directory was deleted after successful clone!\n"
-            f"This is a regression - successful clones should keep the temp directory.\n"
-            f"Path: {test_result['temp_dir_path']}\n\n"
-            f"Only FAILED clones should cleanup temp_dir before raising exception."
-        )
-
-    def test_multiple_clone_failures_accumulate_leaked_dirs(self, tmp_path: Path):
-        """
-        E2E Test: Multiple clone failures accumulate leaked temp directories.
-
-        This test demonstrates the cumulative impact of the bug:
-        1. Each failed clone creates a new leaked temp directory
-        2. Over time, this accumulates and wastes significant disk space
-        3. In production, this can lead to disk space exhaustion
-
-        This is an integration test showing the real-world impact when users
-        experience multiple clone failures (common with typos, auth issues, etc.).
-        """
-        project_root = get_project_root()
-
-        test_script = f'''
-import sys
-import json
-import os
-import glob
-import tempfile
-
-sys.path.insert(0, "{project_root}")
-
-from pdd.agentic_change import _setup_repository
-
-def count_pdd_temp_dirs():
-    """Count temp directories matching pdd_* pattern."""
-    temp_root = tempfile.gettempdir()
-    pattern = os.path.join(temp_root, "pdd_*")
-    return len(glob.glob(pattern))
-
-if __name__ == "__main__":
-    before_count = count_pdd_temp_dirs()
-
-    # Simulate multiple clone failures (e.g., user keeps trying with typos)
-    num_failures = 3
-    failures = []
-
-    for i in range(num_failures):
-        try:
-            _setup_repository(
-                owner="nonexistent-user",
-                repo=f"nonexistent-repo-{{i}}",
-                quiet=True
-            )
-        except RuntimeError:
-            failures.append(i)
-
-    after_count = count_pdd_temp_dirs()
-
-    result = {{
-        "before_count": before_count,
-        "after_count": after_count,
-        "num_failures": len(failures),
-        "leaked_count": after_count - before_count,
-        "bug_detected": (after_count - before_count) >= num_failures
-    }}
+with patch('pdd.agentic_change.subprocess.run') as mock_run:
+    mock_run.return_value = MagicMock(returncode=0, stderr="")
+    try:
+        result_dir = _setup_repository("test-owner", "test-repo", quiet=True)
+        exists = result_dir.exists()
+    except:
+        exists = False
 
-    print(json.dumps(result))
+print(json.dumps({{"exists": exists}}))
 '''
 
-        script_file = tmp_path / "test_accumulation.py"
-        script_file.write_text(test_script)
-
-        env = os.environ.copy()
-        env['PYTHONPATH'] = str(project_root)
-        env['PDD_FORCE_LOCAL'] = '1'
-
-        result = subprocess.run(
-            [sys.executable, str(script_file)],
-            capture_output=True,
-            text=True,
-            cwd=str(tmp_path),
-            env=env,
-            timeout=60
-        )
+    script_file = tmp_path / "test_success.py"
+    script_file.write_text(test_script)
 
-        if result.returncode != 0:
-            pytest.fail(f"Test script failed:\n{result.stderr}")
+    env = os.environ.copy()
+    env['PYTHONPATH'] = str(project_root)
 
-        try:
-            test_result = json.loads(result.stdout.strip())
-        except json.JSONDecodeError:
-            pytest.fail(f"Failed to parse output:\n{result.stdout}")
+    result = subprocess.run(
+        [sys.executable, str(script_file)],
+        capture_output=True, text=True, timeout=30, env=env
+    )
 
-        if test_result['bug_detected']:
-            pytest.fail(
-                f"BUG DETECTED (Issue #418): Multiple failures accumulate leaked directories!\n\n"
-                f"Cumulative Impact:\n"
-                f"  - Number of clone failures: {test_result['num_failures']}\n"
-                f"  - Leaked temp directories: {test_result['leaked_count']}\n"
-                f"  - Before test: {test_result['before_count']} temp dirs\n"
-                f"  - After test: {test_result['after_count']} temp dirs\n\n"
-                f"Real-World Impact:\n"
-                f"  - Users retrying failed commands leak multiple directories\n"
-                f"  - CI/CD systems running many pdd commands accumulate leaks\n"
-                f"  - Over time, disk space is gradually consumed\n"
-                f"  - Can eventually cause 'No space left on device' errors\n\n"
-                f"This demonstrates why proper resource cleanup is critical!"
-            )
+    assert result.returncode == 0, f"Script failed: {result.stderr}"
+    output = json.loads(result.stdout.strip())
+    assert output['exists'], "Successful clone should keep temp directory"

From 837dd8547f7340aa72f9aec1e75cb0cf14f2a063 Mon Sep 17 00:00:00 2001
From: Serhan <serhanasad2013@live.com>
Date: Wed, 28 Jan 2026 16:40:57 -0500
Subject: [PATCH 5/5] Address Copilot review: Log cleanup errors instead of
 silently ignoring

Replace ignore_errors=True with explicit error handling that:
- Attempts cleanup with shutil.rmtree()
- Catches and logs cleanup failures to stderr
- Allows diagnosis of permission issues and other problems
- Doesn't interrupt the main error flow

This addresses the Copilot review comments about silently hiding legitimate
errors during cleanup (e.g., permission issues).
---
 pdd/agentic_change.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/pdd/agentic_change.py b/pdd/agentic_change.py
index b45db0b9..e63e8eb7 100644
--- a/pdd/agentic_change.py
+++ b/pdd/agentic_change.py
@@ -4,6 +4,7 @@
 import re
 import shutil
 import subprocess
+import sys
 import tempfile
 from pathlib import Path
 from typing import List, Tuple, Optional, Any
@@ -125,10 +126,16 @@ def _setup_repository(owner: str, repo: str, quiet: bool) -> Path:
             check=False
         )
         if result.returncode != 0:
-            shutil.rmtree(temp_dir, ignore_errors=True)  # Cleanup on failure
+            try:
+                shutil.rmtree(temp_dir)
+            except Exception as cleanup_error:
+                print(f"Warning: Failed to cleanup temp directory {temp_dir}: {cleanup_error}", file=sys.stderr)
             raise RuntimeError(f"Failed to clone repository: {result.stderr.strip()}")
     except Exception as e:
-        shutil.rmtree(temp_dir, ignore_errors=True)  # Cleanup on exception
+        try:
+            shutil.rmtree(temp_dir)
+        except Exception as cleanup_error:
+            print(f"Warning: Failed to cleanup temp directory {temp_dir}: {cleanup_error}", file=sys.stderr)
         raise RuntimeError(f"Failed to execute clone command: {e}")
 
     return temp_dir