diff --git a/CLAUDE.md b/CLAUDE.md index e06e1e6..f179406 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -324,9 +324,10 @@ devs start eamonn --env DEBUG=false --env NEW_VAR=test **Container Pool**: - `CONTAINER_POOL`: Comma-separated container names for Claude tasks (default: eamonn,harry,darren) - `CI_CONTAINER_POOL`: Optional comma-separated container names for CI/test tasks only. If not specified, CI tasks use the main `CONTAINER_POOL`. If specified, the main `CONTAINER_POOL` is used only for Claude tasks, and this pool is used exclusively for tests. The pools can overlap (share container names) if desired. -- `CONTAINER_TIMEOUT_MINUTES`: Idle timeout for containers in minutes (default: 60) -- `CONTAINER_MAX_AGE_HOURS`: Maximum container age in hours - containers older than this are cleaned up when idle (default: 10) -- `CLEANUP_CHECK_INTERVAL_SECONDS`: How often to check for idle/old containers (default: 60) +- `STOP_CONTAINER_AFTER_TASK`: Stop container after each task completes (default: true). This ensures only one running container per dev name at any time, reducing RAM usage when multiple repos are in play. +- `CONTAINER_TIMEOUT_MINUTES`: Idle timeout for containers in minutes (default: 60). Only applies when `STOP_CONTAINER_AFTER_TASK` is false. +- `CONTAINER_MAX_AGE_HOURS`: Maximum container age in hours - containers older than this are cleaned up when idle (default: 10). Only applies when `STOP_CONTAINER_AFTER_TASK` is false. +- `CLEANUP_CHECK_INTERVAL_SECONDS`: How often to check for idle/old containers (default: 60). Only applies when `STOP_CONTAINER_AFTER_TASK` is false. - `MAX_CONCURRENT_TASKS`: Maximum parallel tasks (default: 3) **Access Control**: @@ -487,14 +488,22 @@ devs-webhook-worker --container-name eamonn --task-json-stdin < task.json ### Container Lifecycle Management -Containers are automatically managed with cleanup based on idle time and age: +By default, containers are stopped immediately after each task completes (`STOP_CONTAINER_AFTER_TASK=true`). This ensures only one running container per dev name (queue) at any time, significantly reducing RAM usage when multiple repositories are in play. -1. **Idle Cleanup**: Containers idle for longer than `CONTAINER_TIMEOUT_MINUTES` (default: 60 min) are stopped and cleaned up -2. **Age-Based Cleanup**: Containers older than `CONTAINER_MAX_AGE_HOURS` (default: 10 hours) are cleaned up when they become idle +**Default Behavior (stop after task)**: +- Container is started when a task begins +- Container is stopped and cleaned up immediately after the task completes +- Next task on the same queue starts a fresh container +- Only one running container per dev name at any time + +**Legacy Behavior** (`STOP_CONTAINER_AFTER_TASK=false`): +Containers remain running and are cleaned up based on idle time and age: +1. **Idle Cleanup**: Containers idle for longer than `CONTAINER_TIMEOUT_MINUTES` (default: 60 min) are stopped +2. **Age-Based Cleanup**: Containers older than `CONTAINER_MAX_AGE_HOURS` (default: 10 hours) are cleaned up when idle 3. **Graceful Shutdown**: On server shutdown (SIGTERM/SIGINT), all running containers are cleaned up 4. **Manual Stop**: Admin can force-stop containers via `POST /container/{name}/stop` endpoint -**Key behaviors**: +**Legacy key behaviors** (only when `STOP_CONTAINER_AFTER_TASK=false`): - Containers currently processing tasks are never interrupted by age-based cleanup - Age-based cleanup only triggers when a container is idle (not actively processing) - The cleanup check runs every `CLEANUP_CHECK_INTERVAL_SECONDS` (default: 60 seconds) @@ -504,6 +513,7 @@ Containers are automatically managed with cleanup based on idle time and age: - `last_used`: Last task completion time - `age_hours`: How long container has been running - `idle_minutes`: How long since last task completed +- `stop_container_after_task`: Whether containers are stopped after each task **Burst Mode Considerations**: In SQS burst mode (`--burst`), the background cleanup worker is disabled since: diff --git a/packages/common/devs_common/core/container.py b/packages/common/devs_common/core/container.py index 583169d..1d0c0f0 100644 --- a/packages/common/devs_common/core/container.py +++ b/packages/common/devs_common/core/container.py @@ -382,47 +382,53 @@ def ensure_container_running( raise ContainerError(f"Failed to ensure container running for {dev_name}: {e}") - def stop_container(self, dev_name: str) -> bool: - """Stop and remove a container by labels (more reliable than names). - + def stop_container(self, dev_name: str, remove: bool = True) -> bool: + """Stop a container by labels, optionally removing it. + Args: dev_name: Development environment name - + remove: If True (default), also remove the container after stopping. + If False, only stop the container (it can be restarted later). + Returns: - True if container was stopped/removed + True if container was stopped (and removed if requested) """ project_labels = self._get_project_labels(dev_name) - + try: console.print(f" ๐Ÿ” Looking for containers with labels: {project_labels}") existing_containers = self.docker.find_containers_by_labels(project_labels) console.print(f" ๐Ÿ“‹ Found {len(existing_containers)} containers") - + if existing_containers: for container_info in existing_containers: container_name = container_info['name'] container_status = container_info['status'] - + console.print(f" ๐Ÿ›‘ Stopping container: {container_name} (status: {container_status})") try: stop_result = self.docker.stop_container(container_name) console.print(f" ๐Ÿ“‹ Stop result: {stop_result}") except DockerError as stop_e: console.print(f" โš ๏ธ Stop failed for {container_name}: {stop_e}") - - console.print(f" ๐Ÿ—‘๏ธ Removing container: {container_name}") - try: - remove_result = self.docker.remove_container(container_name) - console.print(f" ๐Ÿ“‹ Remove result: {remove_result}") - except DockerError as remove_e: - console.print(f" โš ๏ธ Remove failed for {container_name}: {remove_e}") - - console.print(f" โœ… Stopped and removed: {dev_name}") + + if remove: + console.print(f" ๐Ÿ—‘๏ธ Removing container: {container_name}") + try: + remove_result = self.docker.remove_container(container_name) + console.print(f" ๐Ÿ“‹ Remove result: {remove_result}") + except DockerError as remove_e: + console.print(f" โš ๏ธ Remove failed for {container_name}: {remove_e}") + + if remove: + console.print(f" โœ… Stopped and removed: {dev_name}") + else: + console.print(f" โœ… Stopped: {dev_name}") return True else: console.print(f" โš ๏ธ No containers found for {dev_name}") return False - + except DockerError as e: console.print(f" โŒ Error stopping {dev_name}: {e}") return False diff --git a/packages/common/devs_common/core/workspace.py b/packages/common/devs_common/core/workspace.py index f1fbd8a..c92e81f 100644 --- a/packages/common/devs_common/core/workspace.py +++ b/packages/common/devs_common/core/workspace.py @@ -13,7 +13,7 @@ safe_remove_directory, is_directory_empty ) -from ..utils.git_utils import get_tracked_files, is_devcontainer_gitignored +from ..utils.git_utils import get_tracked_files, is_devcontainer_gitignored, reset_git_state from ..utils.devcontainer_template import get_template_dir from ..utils.console import get_console from .project import Project @@ -417,23 +417,29 @@ def cleanup_unused_workspaces_all_projects(self, docker_client) -> int: console.print(f"โŒ Error during cross-project workspace cleanup: {e}") return 0 - def sync_workspace(self, dev_name: str, files_to_sync: Optional[List[str]] = None) -> bool: + def sync_workspace(self, dev_name: str, files_to_sync: Optional[List[str]] = None, clean_untracked: bool = True) -> bool: """Sync specific files from project to workspace. - + Args: dev_name: Development environment name files_to_sync: List of files to sync, or None for git-tracked files - + clean_untracked: If True (default), remove untracked files/dirs from workspace + before syncing. Important when reusing workspaces between tasks. + Returns: True if sync was successful """ workspace_dir = self.get_workspace_dir(dev_name) - + if not self.workspace_exists(dev_name): console.print(f" โŒ Workspace for {dev_name} does not exist") return False - + try: + # Clean up workspace git state before syncing (important for reused workspaces) + if clean_untracked and self.project.info.is_git_repo: + self._reset_workspace_git_state(workspace_dir) + if files_to_sync is None: # Sync git-tracked files if self.project.info.is_git_repo: @@ -456,10 +462,28 @@ def sync_workspace(self, dev_name: str, files_to_sync: Optional[List[str]] = Non file_list=file_paths, preserve_permissions=True ) - + console.print(f" โœ… Synced workspace for {dev_name}") return True - + except Exception as e: console.print(f" โŒ Failed to sync workspace for {dev_name}: {e}") - return False \ No newline at end of file + return False + + def _reset_workspace_git_state(self, workspace_dir: Path) -> None: + """Reset workspace git state to clean state. + + Removes untracked files and resets any uncommitted changes. + Important for reusing workspaces between tasks. + + Args: + workspace_dir: Path to workspace directory + """ + git_dir = workspace_dir / ".git" + if not git_dir.exists(): + return + + if reset_git_state(workspace_dir): + console.print(f" ๐Ÿงน Reset workspace git state") + else: + console.print(f" โš ๏ธ Could not reset workspace git state") \ No newline at end of file diff --git a/packages/common/devs_common/utils/git_utils.py b/packages/common/devs_common/utils/git_utils.py index 1f4896b..4069324 100644 --- a/packages/common/devs_common/utils/git_utils.py +++ b/packages/common/devs_common/utils/git_utils.py @@ -77,6 +77,42 @@ def get_git_root(directory: Path) -> Optional[Path]: return None +def reset_git_state(repo_dir: Path, checkout_branch: Optional[str] = None) -> bool: + """Reset git repository to clean state. + + Discards uncommitted changes and removes untracked files. + Optionally checks out a specific branch. + + Args: + repo_dir: Repository directory path + checkout_branch: Optional branch to checkout (with -f to discard changes) + + Returns: + True if reset was successful, False otherwise + """ + try: + repo = Repo(repo_dir) + + if checkout_branch: + # Force checkout branch (discards local modifications) + try: + repo.git.checkout('-f', checkout_branch) + except GitCommandError: + # Branch might not exist, that's OK + return False + else: + # Reset to HEAD (discard uncommitted changes to tracked files) + repo.git.reset('--hard', 'HEAD') + + # Remove untracked files and directories (but not ignored files) + repo.git.clean('-fd') + + return True + + except (InvalidGitRepositoryError, GitCommandError) as e: + return False + + def is_devcontainer_gitignored(repo_dir: Path) -> bool: """Check if .devcontainer/ folder is gitignored in the repository. diff --git a/packages/webhook/devs_webhook/config.py b/packages/webhook/devs_webhook/config.py index 6ca7d4c..448e3f9 100644 --- a/packages/webhook/devs_webhook/config.py +++ b/packages/webhook/devs_webhook/config.py @@ -84,6 +84,12 @@ def __init__(self, **kwargs): default=60, description="How often to check for idle/old containers (in seconds)" ) + stop_container_after_task: bool = Field( + default=True, + description="Stop container after each task completes. " + "This ensures only one running container per dev name at any time, " + "reducing RAM usage when multiple repos are in play." + ) max_concurrent_tasks: int = Field(default=3, description="Maximum concurrent tasks") # Repository settings diff --git a/packages/webhook/devs_webhook/core/container_pool.py b/packages/webhook/devs_webhook/core/container_pool.py index 254d1b6..4e98eea 100644 --- a/packages/webhook/devs_webhook/core/container_pool.py +++ b/packages/webhook/devs_webhook/core/container_pool.py @@ -837,10 +837,35 @@ async def _process_task_subprocess(self, dev_name: str, queued_task: QueuedTask) # Task execution failed, but we've logged it - don't re-raise finally: - # Update last_used timestamp after task completes (success or failure) + # Handle container cleanup after task completes (success or failure) async with self._lock: if dev_name in self.running_containers: - self.running_containers[dev_name]["last_used"] = datetime.now(tz=timezone.utc) + if self.config.stop_container_after_task: + # Stop container immediately after task completes + # This ensures only one running container per dev name queue + # Don't remove the container or workspace - just stop it so it can restart quickly + info = self.running_containers[dev_name] + logger.info("Stopping container after task completion", + container=dev_name, + repo_path=str(info["repo_path"]), + stop_container_after_task=True) + try: + await self._cleanup_container( + dev_name, + info["repo_path"], + remove_workspace=False, + remove_container=False + ) + del self.running_containers[dev_name] + logger.info("Container stopped successfully after task", + container=dev_name) + except Exception as cleanup_error: + logger.error("Failed to stop container after task", + container=dev_name, + error=str(cleanup_error)) + else: + # Just update last_used timestamp (legacy behavior) + self.running_containers[dev_name]["last_used"] = datetime.now(tz=timezone.utc) async def _checkout_default_branch(self, repo_name: str, repo_path: Path) -> None: """Checkout the default branch to ensure devcontainer files are from the right branch. @@ -876,6 +901,8 @@ async def _checkout_default_branch(self, repo_name: str, repo_path: Path) -> Non logger.info("Checked out default branch", repo=repo_name, branch=default_branch) + # Clean untracked files after checkout + await self._clean_untracked_files(repo_path) elif default_branch == "dev": # dev branch doesn't exist, try main logger.info("Branch 'dev' not found, trying 'main'", @@ -891,6 +918,8 @@ async def _checkout_default_branch(self, repo_name: str, repo_path: Path) -> Non if process.returncode == 0: logger.info("Checked out main branch", repo=repo_name) + # Clean untracked files after checkout + await self._clean_untracked_files(repo_path) else: # Both failed, stay on current branch (probably master or main after clone) logger.warning("Could not checkout dev or main branch, staying on current branch", @@ -903,6 +932,31 @@ async def _checkout_default_branch(self, repo_name: str, repo_path: Path) -> Non branch=default_branch, stderr=stderr.decode()[:200] if stderr else "") + async def _clean_untracked_files(self, repo_path: Path) -> None: + """Remove untracked files and directories from repository. + + Important for reusing repocache between tasks to avoid leftover files + from previous runs affecting the next task. + + Args: + repo_path: Path to the repository + """ + clean_cmd = ["git", "-C", str(repo_path), "clean", "-fd"] + process = await asyncio.create_subprocess_exec( + *clean_cmd, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE + ) + stdout, stderr = await process.communicate() + + if process.returncode == 0: + logger.info("Cleaned untracked files from repository", + repo_path=str(repo_path)) + else: + logger.warning("Could not clean untracked files", + repo_path=str(repo_path), + stderr=stderr.decode()[:200] if stderr else "") + async def _ensure_repository_cloned( self, repo_name: str, @@ -1211,6 +1265,7 @@ async def get_status(self) -> Dict[str, Any]: "idle_timeout_minutes": self.config.container_timeout_minutes, "max_age_hours": self.config.container_max_age_hours, "check_interval_seconds": self.config.cleanup_check_interval_seconds, + "stop_container_after_task": self.config.stop_container_after_task, }, } @@ -1367,36 +1422,54 @@ async def _idle_cleanup_worker(self) -> None: logger.error("Error in idle cleanup worker", error=str(e)) - async def _cleanup_container(self, dev_name: str, repo_path: Path) -> None: + async def _cleanup_container( + self, + dev_name: str, + repo_path: Path, + remove_workspace: bool = True, + remove_container: bool = True + ) -> None: """Clean up a container after use. - + Args: dev_name: Name of container to clean up repo_path: Path to repository on host + remove_workspace: If True (default), also remove the workspace. + If False, only stop the container but keep the workspace + for faster reuse on restart. + remove_container: If True (default), remove the container after stopping. + If False, only stop the container (it can be restarted quickly). """ try: # Create project and managers for cleanup project = Project(repo_path) - + # Use the same config as the rest of the webhook handler workspace_manager = WorkspaceManager(project, self.config) container_manager = ContainerManager(project, self.config) - - # Stop container - logger.info("Starting container stop", container=dev_name) - stop_success = container_manager.stop_container(dev_name) - logger.info("Container stop result", container=dev_name, success=stop_success) - - # Remove workspace - logger.info("Starting workspace removal", container=dev_name) - workspace_success = workspace_manager.remove_workspace(dev_name) - logger.info("Workspace removal result", container=dev_name, success=workspace_success) - - logger.info("Container cleanup complete", + + # Stop container (and optionally remove it) + logger.info("Starting container stop", container=dev_name, remove=remove_container) + stop_success = container_manager.stop_container(dev_name, remove=remove_container) + logger.info("Container stop result", container=dev_name, success=stop_success, removed=remove_container) + + # Remove workspace only if requested + workspace_success = True + if remove_workspace: + logger.info("Starting workspace removal", container=dev_name) + workspace_success = workspace_manager.remove_workspace(dev_name) + logger.info("Workspace removal result", container=dev_name, success=workspace_success) + else: + logger.info("Keeping workspace for faster reuse", + container=dev_name, + workspace_path=str(workspace_manager.get_workspace_dir(dev_name))) + + logger.info("Container cleanup complete", container=dev_name, container_stopped=stop_success, - workspace_removed=workspace_success) - + container_removed=remove_container, + workspace_removed=workspace_success if remove_workspace else "skipped") + except Exception as e: logger.error("Container cleanup failed", container=dev_name, diff --git a/packages/webhook/tests/test_cleanup_mode.py b/packages/webhook/tests/test_cleanup_mode.py new file mode 100644 index 0000000..ffb932e --- /dev/null +++ b/packages/webhook/tests/test_cleanup_mode.py @@ -0,0 +1,165 @@ +"""Tests for container cleanup behavior. + +The cleanup behavior supports two modes: +- remove_workspace=True (default): Full cleanup - stops container AND removes workspace +- remove_workspace=False: Stop-only cleanup - stops container but keeps workspace for faster reuse + +The stop-only mode is useful for the stop-after-task workflow where containers are +stopped after each task but need to restart quickly for the next task on the same repository. +""" + +import asyncio +import pytest +import tempfile +from datetime import datetime, timezone +from pathlib import Path +from unittest.mock import MagicMock, patch, AsyncMock + +from devs_webhook.core.container_pool import ContainerPool + + +@pytest.fixture +def mock_config(): + """Create a mock configuration for cleanup tests.""" + config = MagicMock() + config.get_container_pool_list.return_value = ["eamonn", "harry"] + config.get_ci_container_pool_list.return_value = ["eamonn", "harry"] + config.has_separate_ci_pool.return_value = False + config.github_token = "test-token-1234567890" + config.container_timeout_minutes = 60 + config.container_max_age_hours = 10 + config.cleanup_check_interval_seconds = 60 + config.worker_logs_enabled = False + temp_dir = tempfile.mkdtemp() + config.repo_cache_dir = Path(temp_dir) + return config + + +class TestCleanupContainerPool: + """Tests for cleanup behavior in ContainerPool.""" + + @pytest.mark.asyncio + async def test_cleanup_default_removes_workspace(self, mock_config): + """Test that _cleanup_container by default stops container AND removes workspace.""" + with patch('devs_webhook.core.container_pool.get_config', return_value=mock_config): + pool = ContainerPool() + + # Cancel workers to avoid background task issues + for worker in pool.container_workers.values(): + worker.cancel() + if pool.cleanup_worker: + pool.cleanup_worker.cancel() + + # Mock the managers + mock_container_manager = MagicMock() + mock_container_manager.stop_container.return_value = True + mock_workspace_manager = MagicMock() + mock_workspace_manager.remove_workspace.return_value = True + + with patch('devs_webhook.core.container_pool.ContainerManager', return_value=mock_container_manager), \ + patch('devs_webhook.core.container_pool.WorkspaceManager', return_value=mock_workspace_manager), \ + patch('devs_webhook.core.container_pool.Project'): + + await pool._cleanup_container("eamonn", Path("/tmp/test-repo")) + + # Verify container was stopped AND removed (default: remove=True) + mock_container_manager.stop_container.assert_called_once_with("eamonn", remove=True) + # Verify workspace WAS removed (default behavior) + mock_workspace_manager.remove_workspace.assert_called_once_with("eamonn") + + @pytest.mark.asyncio + async def test_cleanup_stop_only_keeps_workspace(self, mock_config): + """Test that _cleanup_container with remove_workspace=False and remove_container=False keeps both.""" + with patch('devs_webhook.core.container_pool.get_config', return_value=mock_config): + pool = ContainerPool() + + # Cancel workers to avoid background task issues + for worker in pool.container_workers.values(): + worker.cancel() + if pool.cleanup_worker: + pool.cleanup_worker.cancel() + + # Mock the managers + mock_container_manager = MagicMock() + mock_container_manager.stop_container.return_value = True + mock_workspace_manager = MagicMock() + mock_workspace_manager.get_workspace_dir.return_value = Path("/tmp/workspace/eamonn") + + with patch('devs_webhook.core.container_pool.ContainerManager', return_value=mock_container_manager), \ + patch('devs_webhook.core.container_pool.WorkspaceManager', return_value=mock_workspace_manager), \ + patch('devs_webhook.core.container_pool.Project'): + + await pool._cleanup_container("eamonn", Path("/tmp/test-repo"), remove_workspace=False, remove_container=False) + + # Verify container was stopped but NOT removed (remove=False) + mock_container_manager.stop_container.assert_called_once_with("eamonn", remove=False) + # Verify workspace was NOT removed (kept for reuse) + mock_workspace_manager.remove_workspace.assert_not_called() + # Verify get_workspace_dir was called for logging + mock_workspace_manager.get_workspace_dir.assert_called_once_with("eamonn") + + @pytest.mark.asyncio + async def test_cleanup_preserves_workspace_for_reuse(self, mock_config): + """Test that workspace and container are preserved across multiple cleanups with stop-only mode.""" + with patch('devs_webhook.core.container_pool.get_config', return_value=mock_config): + pool = ContainerPool() + + # Cancel workers + for worker in pool.container_workers.values(): + worker.cancel() + if pool.cleanup_worker: + pool.cleanup_worker.cancel() + + dev_name = "eamonn" + repo_path = Path("/tmp/test-repo") + + # Mock the managers + mock_container_manager = MagicMock() + mock_container_manager.stop_container.return_value = True + mock_workspace_manager = MagicMock() + mock_workspace_manager.get_workspace_dir.return_value = Path("/tmp/workspace/eamonn") + + with patch('devs_webhook.core.container_pool.ContainerManager', return_value=mock_container_manager), \ + patch('devs_webhook.core.container_pool.WorkspaceManager', return_value=mock_workspace_manager), \ + patch('devs_webhook.core.container_pool.Project'): + + # First cleanup with stop-only (don't remove container or workspace) + await pool._cleanup_container(dev_name, repo_path, remove_workspace=False, remove_container=False) + + # Verify container stopped (but not removed) and workspace kept + mock_container_manager.stop_container.assert_called_with(dev_name, remove=False) + assert mock_container_manager.stop_container.call_count == 1 + assert mock_workspace_manager.remove_workspace.call_count == 0 + + # Reset mocks for second cleanup + mock_container_manager.reset_mock() + mock_workspace_manager.reset_mock() + + # Second cleanup (simulating another task on same container) + await pool._cleanup_container(dev_name, repo_path, remove_workspace=False, remove_container=False) + + # Again, container stopped (not removed) and workspace still kept + mock_container_manager.stop_container.assert_called_with(dev_name, remove=False) + assert mock_container_manager.stop_container.call_count == 1 + assert mock_workspace_manager.remove_workspace.call_count == 0 + + @pytest.mark.asyncio + async def test_status_includes_cleanup_settings(self, mock_config): + """Test that status endpoint includes cleanup settings.""" + with patch('devs_webhook.core.container_pool.get_config', return_value=mock_config): + pool = ContainerPool() + + # Cancel workers to avoid background task issues + for worker in pool.container_workers.values(): + worker.cancel() + if pool.cleanup_worker: + pool.cleanup_worker.cancel() + + status = await pool.get_status() + + assert "cleanup_settings" in status + assert "idle_timeout_minutes" in status["cleanup_settings"] + assert "max_age_hours" in status["cleanup_settings"] + assert "check_interval_seconds" in status["cleanup_settings"] + assert status["cleanup_settings"]["idle_timeout_minutes"] == 60 + assert status["cleanup_settings"]["max_age_hours"] == 10 diff --git a/packages/webhook/tests/test_stop_container_after_task.py b/packages/webhook/tests/test_stop_container_after_task.py new file mode 100644 index 0000000..12c8737 --- /dev/null +++ b/packages/webhook/tests/test_stop_container_after_task.py @@ -0,0 +1,348 @@ +"""Tests for stop_container_after_task functionality.""" + +import asyncio +import pytest +import tempfile +from datetime import datetime, timezone +from pathlib import Path +from unittest.mock import MagicMock, patch, AsyncMock + +from devs_webhook.core.container_pool import ContainerPool, QueuedTask +from devs_webhook.github.models import ( + WebhookEvent, GitHubRepository, GitHubUser, IssueEvent, GitHubIssue +) +from devs_common.devs_config import DevsOptions + + +@pytest.fixture +def mock_config_stop_after_task(): + """Create a mock configuration with stop_container_after_task enabled.""" + config = MagicMock() + config.get_container_pool_list.return_value = ["eamonn", "harry"] + config.get_ci_container_pool_list.return_value = ["eamonn", "harry"] + config.has_separate_ci_pool.return_value = False + config.github_token = "test-token-1234567890" + config.container_timeout_minutes = 60 + config.container_max_age_hours = 10 + config.cleanup_check_interval_seconds = 60 + config.stop_container_after_task = True # Enabled + config.worker_logs_enabled = False + temp_dir = tempfile.mkdtemp() + config.repo_cache_dir = Path(temp_dir) + return config + + +@pytest.fixture +def mock_config_no_stop_after_task(): + """Create a mock configuration with stop_container_after_task disabled.""" + config = MagicMock() + config.get_container_pool_list.return_value = ["eamonn", "harry"] + config.get_ci_container_pool_list.return_value = ["eamonn", "harry"] + config.has_separate_ci_pool.return_value = False + config.github_token = "test-token-1234567890" + config.container_timeout_minutes = 60 + config.container_max_age_hours = 10 + config.cleanup_check_interval_seconds = 60 + config.stop_container_after_task = False # Disabled (legacy behavior) + config.worker_logs_enabled = False + temp_dir = tempfile.mkdtemp() + config.repo_cache_dir = Path(temp_dir) + return config + + +@pytest.fixture +def mock_event(): + """Create a mock webhook event.""" + return IssueEvent( + action="opened", + repository=GitHubRepository( + id=1, + name="test-repo", + full_name="test-org/test-repo", + owner=GitHubUser( + login="test-org", + id=1, + avatar_url="https://example.com/avatar", + html_url="https://example.com/user" + ), + html_url="https://github.com/test-org/test-repo", + clone_url="https://github.com/test-org/test-repo.git", + ssh_url="git@github.com:test-org/test-repo.git", + default_branch="main" + ), + sender=GitHubUser( + login="sender", + id=2, + avatar_url="https://example.com/avatar2", + html_url="https://example.com/user2" + ), + issue=GitHubIssue( + id=1, + number=42, + title="Test Issue", + body="Test body", + state="open", + user=GitHubUser( + login="sender", + id=2, + avatar_url="https://example.com/avatar2", + html_url="https://example.com/user2" + ), + html_url="https://github.com/test-org/test-repo/issues/42", + created_at="2024-01-01T00:00:00Z", + updated_at="2024-01-01T00:00:00Z" + ) + ) + + +@pytest.mark.asyncio +async def test_status_includes_stop_container_after_task(mock_config_stop_after_task): + """Test that status endpoint includes stop_container_after_task setting.""" + with patch('devs_webhook.core.container_pool.get_config', return_value=mock_config_stop_after_task): + pool = ContainerPool() + + # Cancel workers + for worker in pool.container_workers.values(): + worker.cancel() + if pool.cleanup_worker: + pool.cleanup_worker.cancel() + + status = await pool.get_status() + + assert "cleanup_settings" in status + assert "stop_container_after_task" in status["cleanup_settings"] + assert status["cleanup_settings"]["stop_container_after_task"] is True + + +@pytest.mark.asyncio +async def test_status_includes_stop_container_after_task_disabled(mock_config_no_stop_after_task): + """Test that status shows stop_container_after_task as false when disabled.""" + with patch('devs_webhook.core.container_pool.get_config', return_value=mock_config_no_stop_after_task): + pool = ContainerPool() + + # Cancel workers + for worker in pool.container_workers.values(): + worker.cancel() + if pool.cleanup_worker: + pool.cleanup_worker.cancel() + + status = await pool.get_status() + + assert status["cleanup_settings"]["stop_container_after_task"] is False + + +@pytest.mark.asyncio +async def test_container_stopped_after_task_when_enabled(mock_config_stop_after_task): + """Test that container is stopped after task when stop_container_after_task is enabled.""" + with patch('devs_webhook.core.container_pool.get_config', return_value=mock_config_stop_after_task): + pool = ContainerPool() + + # Cancel workers to prevent actual task processing + for worker in pool.container_workers.values(): + worker.cancel() + if pool.cleanup_worker: + pool.cleanup_worker.cancel() + + # Simulate a running container being tracked + dev_name = "eamonn" + repo_path = Path("/tmp/test-repo") + now = datetime.now(tz=timezone.utc) + pool.running_containers[dev_name] = { + "repo_path": repo_path, + "started_at": now, + "last_used": now, + } + + # Mock the _cleanup_container method + pool._cleanup_container = AsyncMock() + + # Simulate the finally block logic from _process_task_subprocess + async with pool._lock: + if dev_name in pool.running_containers: + if pool.config.stop_container_after_task: + info = pool.running_containers[dev_name] + # When stop_container_after_task=True, we stop but don't remove + # container or workspace for faster restart + await pool._cleanup_container( + dev_name, + info["repo_path"], + remove_workspace=False, + remove_container=False + ) + del pool.running_containers[dev_name] + + # Verify cleanup was called with stop-only parameters + pool._cleanup_container.assert_called_once_with( + dev_name, repo_path, remove_workspace=False, remove_container=False + ) + # Verify container was removed from tracking + assert dev_name not in pool.running_containers + + +@pytest.mark.asyncio +async def test_container_not_stopped_when_disabled(mock_config_no_stop_after_task): + """Test that container is NOT stopped after task when stop_container_after_task is disabled.""" + with patch('devs_webhook.core.container_pool.get_config', return_value=mock_config_no_stop_after_task): + pool = ContainerPool() + + # Cancel workers + for worker in pool.container_workers.values(): + worker.cancel() + if pool.cleanup_worker: + pool.cleanup_worker.cancel() + + # Simulate a running container being tracked + dev_name = "eamonn" + repo_path = Path("/tmp/test-repo") + initial_time = datetime.now(tz=timezone.utc) + pool.running_containers[dev_name] = { + "repo_path": repo_path, + "started_at": initial_time, + "last_used": initial_time, + } + + # Mock the _cleanup_container method + pool._cleanup_container = AsyncMock() + + # Simulate the finally block logic from _process_task_subprocess + async with pool._lock: + if dev_name in pool.running_containers: + if pool.config.stop_container_after_task: + info = pool.running_containers[dev_name] + await pool._cleanup_container(dev_name, info["repo_path"]) + del pool.running_containers[dev_name] + else: + # Just update last_used timestamp (legacy behavior) + pool.running_containers[dev_name]["last_used"] = datetime.now(tz=timezone.utc) + + # Verify cleanup was NOT called + pool._cleanup_container.assert_not_called() + # Verify container is still being tracked + assert dev_name in pool.running_containers + # Verify last_used was updated + assert pool.running_containers[dev_name]["last_used"] > initial_time + + +@pytest.mark.asyncio +async def test_only_one_running_container_per_dev_name(mock_config_stop_after_task): + """Test that stop_container_after_task ensures only one container per dev name. + + When enabled, after each task completes the container is stopped, so the next + task in the same queue will start a fresh container. + """ + with patch('devs_webhook.core.container_pool.get_config', return_value=mock_config_stop_after_task): + pool = ContainerPool() + + # Cancel workers + for worker in pool.container_workers.values(): + worker.cancel() + if pool.cleanup_worker: + pool.cleanup_worker.cancel() + + dev_name = "eamonn" + repo_path_1 = Path("/tmp/test-repo-1") + repo_path_2 = Path("/tmp/test-repo-2") + + # Mock cleanup + pool._cleanup_container = AsyncMock() + + # Simulate first task starting + now = datetime.now(tz=timezone.utc) + pool.running_containers[dev_name] = { + "repo_path": repo_path_1, + "started_at": now, + "last_used": now, + } + + # Simulate first task completing (with stop after task) + async with pool._lock: + if dev_name in pool.running_containers and pool.config.stop_container_after_task: + await pool._cleanup_container( + dev_name, + pool.running_containers[dev_name]["repo_path"], + remove_workspace=False, + remove_container=False + ) + del pool.running_containers[dev_name] + + # Container should be stopped + assert dev_name not in pool.running_containers + pool._cleanup_container.assert_called_once_with( + dev_name, repo_path_1, remove_workspace=False, remove_container=False + ) + + # Reset mock + pool._cleanup_container.reset_mock() + + # Simulate second task starting (different repo) + now = datetime.now(tz=timezone.utc) + pool.running_containers[dev_name] = { + "repo_path": repo_path_2, + "started_at": now, + "last_used": now, + } + + # Simulate second task completing + async with pool._lock: + if dev_name in pool.running_containers and pool.config.stop_container_after_task: + await pool._cleanup_container( + dev_name, + pool.running_containers[dev_name]["repo_path"], + remove_workspace=False, + remove_container=False + ) + del pool.running_containers[dev_name] + + # Container should be stopped again + assert dev_name not in pool.running_containers + pool._cleanup_container.assert_called_once_with( + dev_name, repo_path_2, remove_workspace=False, remove_container=False + ) + + +@pytest.mark.asyncio +async def test_cleanup_error_handling(mock_config_stop_after_task): + """Test that cleanup errors are handled gracefully and don't crash the pool.""" + with patch('devs_webhook.core.container_pool.get_config', return_value=mock_config_stop_after_task): + pool = ContainerPool() + + # Cancel workers + for worker in pool.container_workers.values(): + worker.cancel() + if pool.cleanup_worker: + pool.cleanup_worker.cancel() + + dev_name = "eamonn" + repo_path = Path("/tmp/test-repo") + now = datetime.now(tz=timezone.utc) + pool.running_containers[dev_name] = { + "repo_path": repo_path, + "started_at": now, + "last_used": now, + } + + # Mock cleanup to raise an exception + pool._cleanup_container = AsyncMock(side_effect=Exception("Cleanup failed")) + + # Simulate the finally block - should not raise + try: + async with pool._lock: + if dev_name in pool.running_containers: + if pool.config.stop_container_after_task: + info = pool.running_containers[dev_name] + try: + await pool._cleanup_container( + dev_name, + info["repo_path"], + remove_workspace=False, + remove_container=False + ) + del pool.running_containers[dev_name] + except Exception: + # Error is logged but not re-raised + pass + except Exception as e: + pytest.fail(f"Cleanup error should be handled gracefully, but got: {e}") + + # Container should still be tracked since cleanup failed + assert dev_name in pool.running_containers