diff --git a/pyproject.toml b/pyproject.toml
index 3c53086..94d4b71 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -85,6 +85,7 @@ swe-agent = [
     "unidiff>=0.7",
 ]
 gcp = [
+    "cooperbench",
     "google-cloud-batch>=0.17",
     "google-cloud-compute>=1.0",
     "google-cloud-storage>=2.0",
@@ -186,7 +187,7 @@ exclude_lines = [
 ]
 
 [tool.uv.sources]
-cooperbench = { workspace = true }
 openhands-sdk = { path = "src/cooperbench/agents/openhands_agent_sdk/openhands-sdk", editable = true }
 openhands-tools = { path = "src/cooperbench/agents/openhands_agent_sdk/openhands-tools", editable = true }
 openhands-workspace = { path = "src/cooperbench/agents/openhands_agent_sdk/openhands-workspace", editable = true }
+cooperbench = { workspace = true }
diff --git a/src/cooperbench/agents/__init__.py b/src/cooperbench/agents/__init__.py
index 6ad8c62..631f169 100644
--- a/src/cooperbench/agents/__init__.py
+++ b/src/cooperbench/agents/__init__.py
@@ -104,6 +104,7 @@ def run(
 # Add your agent's shorthand here when registering a new adapter
 AGENT_SHORTHANDS = {
     "mini_swe_agent": "msa",
+    "mini_swe_agent_v2": "msa_v2",
     "swe_agent": "sw",
     "openhands_sdk": "oh",
 }
diff --git a/src/cooperbench/agents/mini_swe_agent/environments/modal.py b/src/cooperbench/agents/mini_swe_agent/environments/modal.py
index 714b2c2..42e54b7 100644
--- a/src/cooperbench/agents/mini_swe_agent/environments/modal.py
+++ b/src/cooperbench/agents/mini_swe_agent/environments/modal.py
@@ -69,8 +69,7 @@ def _get_or_build_image(image_name: str) -> modal.Image:
         if image_name in _image_cache:
             return _image_cache[image_name]
 
-        # Build and cache (CACHE_BUST env forces new image hash) TODO: @arpan remove this after testing
-        image = modal.Image.from_registry(image_name).entrypoint([]).env({"COOPERBENCH_CACHE": "v6"})
+        image = modal.Image.from_registry(image_name).entrypoint([])
         _image_cache[image_name] = image
         return image
 
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/__init__.py b/src/cooperbench/agents/mini_swe_agent_v2/__init__.py
new file mode 100644
index 0000000..207db03
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/__init__.py
@@ -0,0 +1,102 @@
+"""
+mini-swe-agent v2 - A Minimal AI Agent for Software Engineering (Tool-calling version)
+
+Source: https://github.com/SWE-agent/mini-swe-agent
+Version: 2.1.0 (commit 56613dd)
+License: MIT
+Copyright (c) 2025 Kilian A. Lieret and Carlos E. Jimenez
+
+This code is copied directly from mini-swe-agent with modifications:
+- Import paths changed from 'minisweagent' to 'cooperbench.agents.mini_swe_agent_v2'
+- Removed text-based (regex) action parsing (v1 compat), keeping only tool calling
+- Removed interactive agent, non-Docker environments, non-litellm models
+- Added inter-agent communication (messaging + git connectors) for CooperBench
+
+Citation:
+    @misc{minisweagent2025,
+        title={mini-swe-agent: A Minimal AI Agent for Software Engineering},
+        author={Lieret, Kilian A. and Jimenez, Carlos E.},
+        year={2025},
+        url={https://github.com/SWE-agent/mini-swe-agent}
+    }
+
+This file provides:
+- Path settings for global config file & relative directories
+- Version numbering
+- Protocols for the core components of mini-swe-agent.
+"""
+
+__version__ = "2.1.0"
+
+import os
+from pathlib import Path
+from typing import Any, Protocol
+
+import dotenv
+from platformdirs import user_config_dir
+
+from cooperbench.agents.mini_swe_agent_v2.utils.log import logger
+
+package_dir = Path(__file__).resolve().parent
+
+global_config_dir = Path(os.getenv("MSWEA_GLOBAL_CONFIG_DIR") or user_config_dir("mini-swe-agent"))
+global_config_dir.mkdir(parents=True, exist_ok=True)
+global_config_file = Path(global_config_dir) / ".env"
+
+dotenv.load_dotenv(dotenv_path=global_config_file)
+
+
+# === Protocols ===
+# You can ignore them unless you want static type checking.
+
+
+class Model(Protocol):
+    """Protocol for language models."""
+
+    config: Any
+
+    def query(self, messages: list[dict[str, str]], **kwargs) -> dict: ...
+
+    def format_message(self, **kwargs) -> dict: ...
+
+    def format_observation_messages(
+        self, message: dict, outputs: list[dict], template_vars: dict | None = None
+    ) -> list[dict]: ...
+
+    def get_template_vars(self, **kwargs) -> dict[str, Any]: ...
+
+    def serialize(self) -> dict: ...
+
+
+class Environment(Protocol):
+    """Protocol for execution environments."""
+
+    config: Any
+
+    def execute(self, action: dict, cwd: str = "") -> dict[str, Any]: ...
+
+    def get_template_vars(self, **kwargs) -> dict[str, Any]: ...
+
+    def serialize(self) -> dict: ...
+
+
+class Agent(Protocol):
+    """Protocol for agents."""
+
+    config: Any
+
+    def run(self, task: str, **kwargs) -> dict: ...
+
+    def save(self, path: Path | None, *extra_dicts) -> dict: ...
+
+
+__all__ = [
+    "Agent",
+    "Model",
+    "Environment",
+    "package_dir",
+    "__version__",
+    "global_config_file",
+    "global_config_dir",
+    "logger",
+]
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/adapter.py b/src/cooperbench/agents/mini_swe_agent_v2/adapter.py
new file mode 100644
index 0000000..c9bcbf7
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/adapter.py
@@ -0,0 +1,145 @@
+"""Mini-SWE-Agent v2 adapter for CooperBench.
+
+This adapter wraps the mini-swe-agent v2 framework (tool-calling version)
+to conform to the AgentRunner interface used by CooperBench.
+"""
+
+import yaml
+
+from cooperbench.agents import AgentResult
+from cooperbench.agents.mini_swe_agent_v2.agents.default import DefaultAgent
+from cooperbench.agents.mini_swe_agent_v2.config import get_config_path
+from cooperbench.agents.mini_swe_agent_v2.connectors import GitConnector
+from cooperbench.agents.mini_swe_agent_v2.connectors.messaging import MessagingConnector
+from cooperbench.agents.mini_swe_agent_v2.models.litellm_model import LitellmModel
+from cooperbench.agents.mini_swe_agent_v2.models.utils.actions_toolcall import SEND_MESSAGE_TOOL
+from cooperbench.agents.registry import register
+
+
+@register("mini_swe_agent_v2")
+class MiniSweAgentV2Runner:
+    """Adapter for mini-swe-agent v2 framework (tool-calling)."""
+
+    def run(
+        self,
+        task: str,
+        image: str,
+        *,
+        agent_id: str = "agent",
+        model_name: str = "gpt-4o",
+        agents: list[str] | None = None,
+        comm_url: str | None = None,
+        git_server_url: str | None = None,
+        git_enabled: bool = False,
+        messaging_enabled: bool = True,
+        config: dict | None = None,
+        agent_config: str | None = None,
+        log_dir: str | None = None,
+    ) -> AgentResult:
+        """Run mini-swe-agent v2 on a task."""
+        # Always load default config, then merge with any overrides
+        config_path = get_config_path("mini")
+        with open(config_path) as f:
+            default_config = yaml.safe_load(f)
+
+        # Merge passed config overrides into default config
+        if config is not None:
+            default_config.update(config)
+
+        agent_cfg = default_config.get("agent", {})
+        model_cfg = default_config.get("model", {})
+        env_cfg = default_config.get("environment", {})
+        backend = default_config.get("backend", "modal")
+
+        # Create environment based on backend
+        env_kwargs = {
+            "image": image,
+            "cwd": "/workspace/repo",
+            "timeout": 3600,
+        }
+        if env_cfg.get("env"):
+            env_kwargs["env"] = env_cfg["env"]
+
+        if backend == "docker":
+            from cooperbench.agents.mini_swe_agent_v2.environments.docker import DockerEnvironment
+
+            if config and config.get("git_network"):
+                env_kwargs["network"] = config["git_network"]
+            env = DockerEnvironment(**env_kwargs)
+        else:
+            from cooperbench.agents.mini_swe_agent_v2.environments.modal import ModalEnvironment
+
+            env = ModalEnvironment(**env_kwargs)
+
+        # Capture base commit for patch generation
+        base_commit_result = env.execute({"command": "git rev-parse HEAD"})
+        base_commit = base_commit_result.get("output", "").strip()
+
+        # Setup messaging connector if enabled
+        comm = None
+        use_messaging = messaging_enabled and comm_url and agents and len(agents) > 1
+        if use_messaging:
+            comm = MessagingConnector(agent_id=agent_id, agents=agents, url=comm_url)
+
+        # Create LLM model with send_message tool if messaging is enabled
+        extra_tools = [SEND_MESSAGE_TOOL] if use_messaging else None
+        model = LitellmModel(model_name=model_name, extra_tools=extra_tools, **model_cfg)
+
+        # Setup git connector if enabled
+        if git_enabled and git_server_url and agents:
+            git_connector = GitConnector(
+                agent_id=agent_id,
+                agents=agents,
+                server_url=git_server_url,
+            )
+            git_connector.setup(env)
+
+        # Create agent with template variables for collaboration
+        extra_vars = {
+            "agent_id": agent_id if (agents and len(agents) > 1) else None,
+            "agents": agents if agents else [],
+            "git_enabled": git_enabled,
+            "messaging_enabled": messaging_enabled,
+        }
+
+        agent = DefaultAgent(
+            model=model,
+            env=env,
+            comm=comm,
+            agent_id=agent_id,
+            **agent_cfg,
+        )
+        agent.extra_template_vars.update(extra_vars)
+
+        # Run agent
+        error_msg = None
+        try:
+            result = agent.run(task=task)
+            status = result.get("exit_status", "Submitted")
+        except Exception as e:
+            status = "Error"
+            error_msg = str(e)
+
+        # Extract patch (committed + uncommitted changes)
+        patch = self._get_patch(env, base_commit)
+
+        # Cleanup
+        env.cleanup()
+
+        return AgentResult(
+            status=status,
+            patch=patch,
+            cost=agent.cost,
+            steps=agent.n_calls,
+            messages=agent.messages,
+            sent_messages=agent.sent_messages,
+            error=error_msg,
+        )
+
+    def _get_patch(self, env, base_commit: str) -> str:
+        """Extract git diff from base commit to current working tree state."""
+        try:
+            result = env.execute({"command": f"git diff {base_commit}"})
+            return result.get("output", "").strip()
+        except Exception:
+            return ""
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/agents/__init__.py b/src/cooperbench/agents/mini_swe_agent_v2/agents/__init__.py
new file mode 100644
index 0000000..d2df172
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/agents/__init__.py
@@ -0,0 +1 @@
+"""Agent implementations for mini-SWE-agent v2."""
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/agents/default.py b/src/cooperbench/agents/mini_swe_agent_v2/agents/default.py
new file mode 100644
index 0000000..75e2f47
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/agents/default.py
@@ -0,0 +1,205 @@
+"""Basic agent class. See https://mini-swe-agent.com/latest/advanced/control_flow/ for visual explanation
+or https://minimal-agent.com for a tutorial on the basic building principles.
+"""
+
+import json
+import logging
+import traceback
+from pathlib import Path
+
+from jinja2 import StrictUndefined, Template
+from pydantic import BaseModel
+
+from cooperbench.agents.mini_swe_agent_v2 import Environment, Model, __version__
+from cooperbench.agents.mini_swe_agent_v2.connectors.messaging import MessagingConnector
+from cooperbench.agents.mini_swe_agent_v2.exceptions import InterruptAgentFlow, LimitsExceeded
+from cooperbench.agents.mini_swe_agent_v2.utils.serialize import recursive_merge
+
+
+class AgentConfig(BaseModel):
+    """Check the config files in config/ for example settings."""
+
+    system_template: str
+    """Template for the system message (the first message)."""
+    instance_template: str
+    """Template for the first user message specifying the task (the second message overall)."""
+    step_limit: int = 0
+    """Maximum number of steps the agent can take."""
+    cost_limit: float = 3.0
+    """Stop agent after exceeding (!) this cost."""
+    output_path: Path | None = None
+    """Save the trajectory to this path."""
+
+
+class DefaultAgent:
+    def __init__(
+        self,
+        model: Model,
+        env: Environment,
+        *,
+        comm: MessagingConnector | None = None,
+        agent_id: str = "agent",
+        config_class: type = AgentConfig,
+        **kwargs,
+    ):
+        """See the `AgentConfig` class for permitted keyword arguments."""
+        self.config = config_class(**kwargs)
+        self.messages: list[dict] = []
+        self.model = model
+        self.env = env
+        self.comm = comm
+        self.agent_id = agent_id
+        self.extra_template_vars = {}
+        self.logger = logging.getLogger("agent")
+        self.cost = 0.0
+        self.n_calls = 0
+        self.sent_messages: list[dict] = []
+
+    def log(self, msg: str):
+        """Log message with agent prefix."""
+        self.logger.debug(f"[{self.agent_id}] {msg}")
+
+    def get_template_vars(self, **kwargs) -> dict:
+        return recursive_merge(
+            self.config.model_dump(),
+            self.env.get_template_vars(),
+            self.model.get_template_vars(),
+            {"n_model_calls": self.n_calls, "model_cost": self.cost},
+            self.extra_template_vars,
+            kwargs,
+        )
+
+    def _render_template(self, template: str) -> str:
+        return Template(template, undefined=StrictUndefined).render(**self.get_template_vars())
+
+    def add_messages(self, *messages: dict) -> list[dict]:
+        self.logger.debug(messages)  # set log level to debug to see
+        self.messages.extend(messages)
+        return list(messages)
+
+    def handle_uncaught_exception(self, e: Exception) -> list[dict]:
+        return self.add_messages(
+            self.model.format_message(
+                role="exit",
+                content=str(e),
+                extra={
+                    "exit_status": type(e).__name__,
+                    "submission": "",
+                    "exception_str": str(e),
+                    "traceback": traceback.format_exc(),
+                },
+            )
+        )
+
+    def run(self, task: str = "", **kwargs) -> dict:
+        """Run step() until agent is finished. Returns dictionary with exit_status, submission keys."""
+        self.extra_template_vars |= {"task": task, **kwargs}
+        self.messages = []
+        self.add_messages(
+            self.model.format_message(role="system", content=self._render_template(self.config.system_template)),
+            self.model.format_message(role="user", content=self._render_template(self.config.instance_template)),
+        )
+        while True:
+            try:
+                self.step()
+            except InterruptAgentFlow as e:
+                self.add_messages(*e.messages)
+            except Exception as e:
+                self.handle_uncaught_exception(e)
+                raise
+            finally:
+                self.save(self.config.output_path)
+            if self.messages[-1].get("role") == "exit":
+                break
+        return self.messages[-1].get("extra", {})
+
+    def step(self) -> list[dict]:
+        """Query the LM, execute actions. Polls for inter-agent messages before querying."""
+        # Check for inter-agent messages before querying LLM
+        if self.comm:
+            messages = self.comm.receive()
+            for msg in messages:
+                ts = msg.get("timestamp", "")[:19].replace("T", " ")
+                self.log(f"INBOX: [{msg['from']} @ {ts}] {msg['content']}")
+                self.add_messages(
+                    self.model.format_message(
+                        role="user",
+                        content=f"[Message from {msg['from']}]: {msg['content']}",
+                    )
+                )
+        return self.execute_actions(self.query())
+
+    def query(self) -> dict:
+        """Query the model and return model messages. Override to add hooks."""
+        if 0 < self.config.step_limit <= self.n_calls or 0 < self.config.cost_limit <= self.cost:
+            raise LimitsExceeded(
+                {
+                    "role": "exit",
+                    "content": "LimitsExceeded",
+                    "extra": {"exit_status": "LimitsExceeded", "submission": ""},
+                }
+            )
+        self.n_calls += 1
+        message = self.model.query(self.messages)
+        self.cost += message.get("extra", {}).get("cost", 0.0)
+        self.add_messages(message)
+        return message
+
+    def execute_actions(self, message: dict) -> list[dict]:
+        """Execute actions in message, add observation messages, return them.
+
+        Handles both bash and send_message tool calls.
+        """
+        actions = message.get("extra", {}).get("actions", [])
+        outputs = []
+        for action in actions:
+            tool_name = action.get("tool_name", "bash")
+            if tool_name == "send_message" and self.comm:
+                output = self._handle_send_message(action)
+            else:
+                outputs.append(self.env.execute(action))
+                continue
+            outputs.append(output)
+        return self.add_messages(*self.model.format_observation_messages(message, outputs, self.get_template_vars()))
+
+    def _handle_send_message(self, action: dict) -> dict:
+        """Handle a send_message tool call via the messaging connector."""
+        recipient = action.get("recipient", "")
+        content = action.get("content", "")
+        self.comm.send(recipient, content)
+        self.log(f"SENT to {recipient}: {content[:80]}...")
+        self.sent_messages.append({"to": recipient, "content": content})
+        return {"output": f"Message sent to {recipient}", "returncode": 0}
+
+    def serialize(self, *extra_dicts) -> dict:
+        """Serialize agent state to a json-compatible nested dictionary for saving."""
+        last_message = self.messages[-1] if self.messages else {}
+        last_extra = last_message.get("extra", {})
+        agent_data = {
+            "info": {
+                "model_stats": {
+                    "instance_cost": self.cost,
+                    "api_calls": self.n_calls,
+                },
+                "config": {
+                    "agent": self.config.model_dump(mode="json"),
+                    "agent_type": f"{self.__class__.__module__}.{self.__class__.__name__}",
+                },
+                "mini_version": __version__,
+                "exit_status": last_extra.get("exit_status", ""),
+                "submission": last_extra.get("submission", ""),
+            },
+            "messages": self.messages,
+            "trajectory_format": "mini-swe-agent-1.1",
+        }
+        return recursive_merge(agent_data, self.model.serialize(), self.env.serialize(), *extra_dicts)
+
+    def save(self, path: Path | None, *extra_dicts) -> dict:
+        """Save the trajectory of the agent to a file if path is given. Returns full serialized data.
+        You can pass additional dictionaries with extra data to be (recursively) merged into the output data.
+        """
+        data = self.serialize(*extra_dicts)
+        if path:
+            path.parent.mkdir(parents=True, exist_ok=True)
+            path.write_text(json.dumps(data, indent=2))
+        return data
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/config/__init__.py b/src/cooperbench/agents/mini_swe_agent_v2/config/__init__.py
new file mode 100644
index 0000000..54ef179
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/config/__init__.py
@@ -0,0 +1,62 @@
+"""Configuration files and utilities for mini-SWE-agent."""
+
+import json
+import os
+from pathlib import Path
+
+import yaml
+
+builtin_config_dir = Path(__file__).parent
+
+
+def get_config_path(config_spec: str | Path) -> Path:
+    """Get the path to a config file."""
+    config_spec = Path(config_spec)
+    if config_spec.suffix != ".yaml":
+        config_spec = config_spec.with_suffix(".yaml")
+    candidates = [
+        Path(config_spec),
+        Path(os.getenv("MSWEA_CONFIG_DIR", ".")) / config_spec,
+        builtin_config_dir / config_spec,
+        builtin_config_dir / "extra" / config_spec,
+        builtin_config_dir / "benchmarks" / config_spec,
+    ]
+    for candidate in candidates:
+        if candidate.exists():
+            return candidate
+
+    raise FileNotFoundError(f"Could not find config file for {config_spec} (tried: {candidates})")
+
+
+def _key_value_spec_to_nested_dict(config_spec: str) -> dict:
+    """Interpret key-value specs from the command line.
+
+    Example:
+
+    "model.model_name=anthropic/claude-sonnet-4-5-20250929"   ->
+    {"model": {"model_name": "anthropic/claude-sonnet-4-5-20250929"}}
+    """
+    key, value = config_spec.split("=", 1)
+    try:
+        value = json.loads(value)
+    except json.JSONDecodeError:
+        pass
+    keys = key.split(".")
+    result = {}
+    current = result
+    for k in keys[:-1]:
+        current[k] = {}
+        current = current[k]
+    current[keys[-1]] = value
+    return result
+
+
+def get_config_from_spec(config_spec: str | Path) -> dict:
+    """Get a config from a config spec."""
+    if isinstance(config_spec, str) and "=" in config_spec:
+        return _key_value_spec_to_nested_dict(config_spec)
+    path = get_config_path(config_spec)
+    return yaml.safe_load(path.read_text())
+
+
+__all__ = ["builtin_config_dir", "get_config_path", "get_config_from_spec", "_key_value_spec_to_nested_dict"]
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/config/mini.yaml b/src/cooperbench/agents/mini_swe_agent_v2/config/mini.yaml
new file mode 100644
index 0000000..b52ad8b
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/config/mini.yaml
@@ -0,0 +1,147 @@
+agent:
+  system_template: |
+    You are a helpful assistant that can interact with a computer.
+  instance_template: |
+    Please solve this issue: {{task}}
+
+    You can execute bash commands and edit files to implement the necessary changes.
+
+    ## Recommended Workflow
+
+    This workflows should be done step-by-step so that you can iterate on your changes and any possible problems.
+
+    1. Analyze the codebase by finding and reading relevant files
+    2. Create a script to reproduce the issue
+    3. Edit the source code to resolve the issue
+    4. Verify your fix works by running your script again
+    5. Test edge cases to ensure your fix is robust
+    6. Submit your changes and finish your work by issuing the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`.
+       Do not combine it with any other command. <important>After this command, you cannot continue working on this task.</important>
+
+    ## Command Execution Rules
+
+    You are operating in an environment where
+
+    1. You issue at least one command
+    2. The system executes the command(s) in a subshell
+    3. You see the result(s)
+    4. You write your next command(s)
+
+    Each response should include:
+
+    1. **Reasoning text** where you explain your analysis and plan
+    2. At least one tool call with your command
+
+    **CRITICAL REQUIREMENTS:**
+
+    - Your response SHOULD include reasoning text explaining what you're doing
+    - Your response MUST include AT LEAST ONE bash tool call
+    - Directory or environment variable changes are not persistent. Every action is executed in a new subshell.
+    - However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files
+    - Submit your changes and finish your work by issuing the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`.
+      Do not combine it with any other command. <important>After this command, you cannot continue working on this task.</important>
+
+    Example of a CORRECT response:
+    <example_response>
+    I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase.
+
+    [Makes bash tool call with {"command": "ls -la"} as arguments]
+    </example_response>
+
+    <system_information>
+    {{system}} {{release}} {{version}} {{machine}}
+    </system_information>
+
+    ## Useful command examples
+
+    ### Create a new file:
+
+    ```bash
+    cat <<'EOF' > newfile.py
+    import numpy as np
+    hello = "world"
+    print(hello)
+    EOF
+    ```
+
+    ### Edit files with sed:
+
+    {%- if system == "Darwin" -%}
+    <important>
+    You are on MacOS. For all the below examples, you need to use `sed -i ''` instead of `sed -i`.
+    </important>
+    {%- endif -%}
+
+    ```bash
+    # Replace all occurrences
+    sed -i 's/old_string/new_string/g' filename.py
+
+    # Replace only first occurrence
+    sed -i 's/old_string/new_string/' filename.py
+
+    # Replace first occurrence on line 1
+    sed -i '1s/old_string/new_string/' filename.py
+
+    # Replace all occurrences in lines 1-10
+    sed -i '1,10s/old_string/new_string/g' filename.py
+    ```
+
+    ### View file content:
+
+    ```bash
+    # View specific lines with numbers
+    nl -ba filename.py | sed -n '10,20p'
+    ```
+
+    ### Any other command you want to run
+
+    ```bash
+    anything
+    ```
+  step_limit: 0
+  cost_limit: 3.
+  mode: confirm
+environment:
+  env:
+    PAGER: cat
+    MANPAGER: cat
+    LESS: -R
+    PIP_PROGRESS_BAR: 'off'
+    TQDM_DISABLE: '1'
+model:
+  observation_template: |
+    {%- if output.output | length < 10000 -%}
+    {
+      "returncode": {{ output.returncode }},
+      "output": {{ output.output | tojson }}
+      {%- if output.exception_info %}, "exception_info": {{ output.exception_info | tojson }}{% endif %}
+    }
+    {%- else -%}
+    {
+      "returncode": {{ output.returncode }},
+      "output_head": {{ output.output[:5000] | tojson }},
+      "output_tail": {{ output.output[-5000:] | tojson }},
+      "elided_chars": {{ output.output | length - 10000 }},
+      "warning": "Output too long."
+      {%- if output.exception_info %}, "exception_info": {{ output.exception_info | tojson }}{% endif %}
+    }
+    {%- endif -%}
+  format_error_template: |
+    Tool call error:
+
+    <error>
+    {{error}}
+    </error>
+
+    Here is general guidance on how to submit correct toolcalls:
+
+    Every response needs to use the 'bash' tool at least once to execute commands.
+
+    Call the bash tool with your command as the argument:
+    - Tool: bash
+    - Arguments: {"command": "your_command_here"}
+
+    If you want to end the task, please issue the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`
+    without any other command.
+  model_kwargs:
+    drop_params: true
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/connectors/__init__.py b/src/cooperbench/agents/mini_swe_agent_v2/connectors/__init__.py
new file mode 100644
index 0000000..adc30bb
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/connectors/__init__.py
@@ -0,0 +1,21 @@
+"""Connectors for inter-agent communication."""
+
+from cooperbench.agents.mini_swe_agent_v2.connectors.git import GitConnector
+from cooperbench.agents.mini_swe_agent_v2.connectors.git_servers import (
+    DockerGitServer,
+    GCPGitServer,
+    GitServer,
+    ModalGitServer,
+    create_git_server,
+)
+from cooperbench.agents.mini_swe_agent_v2.connectors.messaging import MessagingConnector
+
+__all__ = [
+    "DockerGitServer",
+    "GCPGitServer",
+    "GitConnector",
+    "GitServer",
+    "MessagingConnector",
+    "ModalGitServer",
+    "create_git_server",
+]
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/connectors/git.py b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git.py
new file mode 100644
index 0000000..b0781be
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git.py
@@ -0,0 +1,132 @@
+"""Git-based code sharing between agents.
+
+Enables agents in separate containers to share code via git push/pull.
+Uses a shared git server sandbox that agents connect to as a remote.
+
+Architecture:
+    +---------------------------------------------------------+
+    |                    Git Server Sandbox                    |
+    |        git daemon --enable=receive-pack (bare repo)      |
+    +---------------------------------------------------------+
+                               ^
+              +----------------+----------------+
+              |                |                |
+         git push         git fetch        git push
+         git pull                          git pull
+              |                |                |
+    +---------v----+                 +---------v----+
+    |   Agent A    |                 |   Agent B    |
+    |   sandbox    |                 |   sandbox    |
+    +--------------+                 +--------------+
+
+Example:
+    # Create shared git server (once per task)
+    from cooperbench.agents.mini_swe_agent_v2.connectors.git_servers import get_git_server
+
+    GitServerClass = get_git_server("docker")  # or "modal"
+    git_server = GitServerClass.create(run_id="abc123")
+
+    # Create connector for each agent
+    git = GitConnector(
+        agent_id="agent1",
+        agents=["agent1", "agent2"],
+        server_url=git_server.url
+    )
+
+    # Configure agent's sandbox
+    git.setup(env)
+
+    # Agent can now use git normally:
+    #   git push team agent1
+    #   git fetch team
+    #   git merge team/agent2
+"""
+
+from __future__ import annotations
+
+import logging
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from cooperbench.agents.mini_swe_agent_v2.environments.docker import DockerEnvironment
+
+
+class GitConnector:
+    """Configures an agent's sandbox for git collaboration.
+
+    After setup(), the agent can use standard git commands:
+    - git push team <branch>  - share changes
+    - git fetch team          - get other agents' branches
+    - git merge team/<agent>  - merge another agent's work
+    - git branch -r           - list remote branches
+    """
+
+    # Remote name used in agent's git config
+    REMOTE_NAME = "team"
+
+    def __init__(
+        self,
+        agent_id: str,
+        agents: list[str],
+        server_url: str,
+    ):
+        """Initialize git connector.
+
+        Args:
+            agent_id: This agent's unique identifier (e.g., "agent1")
+            agents: List of all agent IDs in the collaboration
+            server_url: Git server URL from GitServer.url
+        """
+        self.agent_id = agent_id
+        self.agents = agents
+        self.server_url = server_url
+        self._logger = logging.getLogger("cooperbench.agents.mini_swe_agent_v2.git_connector")
+        self._initialized = False
+
+    def _exec(self, env: DockerEnvironment, command: str) -> dict:
+        """Execute a command in the environment (v2 uses dict-based actions)."""
+        return env.execute({"command": command})
+
+    def setup(self, env: DockerEnvironment) -> None:
+        """Configure git remote in the agent's sandbox.
+
+        This sets up the 'team' remote pointing to the shared git server,
+        creates an agent-specific branch, and pushes the initial state.
+
+        Args:
+            env: The agent's Docker environment
+
+        Raises:
+            RuntimeError: If git configuration fails
+        """
+        self._logger.debug(f"Setting up git for {self.agent_id}")
+
+        # Configure git user (needed for commits)
+        self._exec(env, 'git config user.email "agent@cooperbench.local"')
+        self._exec(env, f'git config user.name "{self.agent_id}"')
+
+        # Add shared remote
+        result = self._exec(env, f"git remote add {self.REMOTE_NAME} {self.server_url}")
+        if result.get("returncode", 0) != 0:
+            # Remote might already exist
+            self._exec(env, f"git remote set-url {self.REMOTE_NAME} {self.server_url}")
+
+        # Create agent's branch
+        self._exec(env, f"git checkout -b {self.agent_id}")
+
+        # Push initial state (first agent initializes the server)
+        # Use --force in case branch exists from a previous run
+        result = self._exec(env, f"git push -u {self.REMOTE_NAME} {self.agent_id} --force")
+        if result.get("returncode", 0) != 0:
+            self._logger.warning(f"Initial push failed: {result.get('output', '')}")
+
+        # Also push main/master as base reference
+        self._exec(env, f"git push {self.REMOTE_NAME} HEAD:refs/heads/main --force 2>/dev/null || true")
+
+        self._initialized = True
+        self._logger.debug(f"Git setup complete for {self.agent_id}")
+
+    @property
+    def is_initialized(self) -> bool:
+        """Whether setup() has been called."""
+        return self._initialized
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/__init__.py b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/__init__.py
new file mode 100644
index 0000000..6f4b765
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/__init__.py
@@ -0,0 +1,89 @@
+"""Git servers for inter-agent code collaboration.
+
+Provides pluggable backends for hosting shared git repositories:
+- Modal: Cloud-based sandboxes (default)
+- Docker: Local containers
+- GCP: Google Cloud Platform VMs
+"""
+
+from __future__ import annotations
+
+from typing import TYPE_CHECKING
+
+from cooperbench.agents.mini_swe_agent_v2.connectors.git_servers.base import GitServer
+from cooperbench.agents.mini_swe_agent_v2.connectors.git_servers.docker import DockerGitServer
+from cooperbench.agents.mini_swe_agent_v2.connectors.git_servers.gcp import GCPGitServer
+from cooperbench.agents.mini_swe_agent_v2.connectors.git_servers.modal import ModalGitServer
+
+if TYPE_CHECKING:
+    import modal
+
+__all__ = ["GitServer", "ModalGitServer", "DockerGitServer", "GCPGitServer", "create_git_server"]
+
+
+def create_git_server(
+    backend: str,
+    run_id: str,
+    *,
+    app: modal.App | None = None,
+    timeout: int = 3600,
+    # GCP-specific options
+    project_id: str | None = None,
+    zone: str = "us-central1-a",
+    machine_type: str = "e2-micro",
+    network: str | None = None,
+) -> ModalGitServer | DockerGitServer | GCPGitServer:
+    """Create a git server for the specified backend.
+
+    Args:
+        backend: Backend name ("modal", "docker", or "gcp")
+        run_id: Unique run identifier
+        app: Modal app (required for modal backend)
+        timeout: Server timeout in seconds
+        project_id: GCP project ID (gcp backend only)
+        zone: GCP zone (gcp backend only, default: us-central1-a)
+        machine_type: GCP machine type (gcp backend only, default: e2-micro)
+        network: VPC network name (gcp backend only, for agent connectivity)
+
+    Returns:
+        Git server instance ready to accept connections
+
+    Example:
+        # Docker backend
+        server = create_git_server("docker", run_id="my-run")
+
+        # Modal backend
+        app = modal.App.lookup("cooperbench", create_if_missing=True)
+        server = create_git_server("modal", run_id="my-run", app=app)
+
+        # GCP backend
+        server = create_git_server(
+            "gcp",
+            run_id="my-run",
+            project_id="my-project",
+            network="cooperbench-vpc",
+        )
+
+        # Use the server
+        print(server.url)
+        # ... agents push/pull ...
+        server.cleanup()
+    """
+    if backend == "docker":
+        return DockerGitServer.create(run_id=run_id, timeout=timeout)
+    elif backend == "modal":
+        if app is None:
+            raise ValueError("Modal backend requires 'app' parameter")
+        return ModalGitServer.create(app=app, run_id=run_id, timeout=timeout)
+    elif backend == "gcp":
+        return GCPGitServer.create(
+            run_id=run_id,
+            project_id=project_id,
+            zone=zone,
+            machine_type=machine_type,
+            network=network,
+            timeout=timeout,
+        )
+    else:
+        available = "docker, modal, gcp"
+        raise ValueError(f"Unknown git server backend: '{backend}'. Available: {available}")
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/base.py b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/base.py
new file mode 100644
index 0000000..2dc4e9a
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/base.py
@@ -0,0 +1,24 @@
+"""Base protocol for git servers."""
+
+from typing import Protocol
+
+
+class GitServer(Protocol):
+    """Abstract git server for code collaboration.
+
+    Git servers provide a shared repository that agents can push to and pull from.
+    Different implementations can use Docker, Modal, GCP VMs, etc.
+    """
+
+    @property
+    def url(self) -> str:
+        """Git URL for agents to use as remote.
+
+        Returns:
+            Git URL for the repository (e.g., git://hostname:port/repo.git)
+        """
+        ...
+
+    def cleanup(self) -> None:
+        """Terminate and clean up the git server resources."""
+        ...
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/docker.py b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/docker.py
new file mode 100644
index 0000000..8e32b2e
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/docker.py
@@ -0,0 +1,175 @@
+"""Docker-based git server for code collaboration."""
+
+from __future__ import annotations
+
+import logging
+import time
+
+import docker
+
+
+class DockerGitServer:
+    """Shared git server container for code collaboration using Docker.
+
+    Creates a Docker container running git-daemon that agents can push/pull to.
+    """
+
+    def __init__(self, container, hostname: str, port: int, network_name: str):
+        """Initialize with an existing container.
+
+        Use DockerGitServer.create() to create a new server.
+        """
+        self._container = container
+        self._hostname = hostname
+        self._port = port
+        self._network_name = network_name
+        self._logger = logging.getLogger("cooperbench.agents.mini_swe_agent_v2.git_server.docker")
+
+    @classmethod
+    def create(
+        cls,
+        run_id: str,
+        timeout: int = 3600,
+    ) -> DockerGitServer:
+        """Create and start a git server container.
+
+        Args:
+            run_id: Unique run identifier (for container naming)
+            timeout: Container timeout in seconds (not enforced, for compatibility)
+
+        Returns:
+            DockerGitServer instance ready to accept connections
+        """
+        logger = logging.getLogger("cooperbench.agents.mini_swe_agent_v2.git_server.docker")
+        logger.debug(f"Creating docker git server for run {run_id}")
+
+        client = docker.from_env()
+
+        # Use a simple Debian-based image with git
+        image = "debian:bookworm-slim"
+
+        # Pull image if not present
+        try:
+            client.images.get(image)
+        except docker.errors.ImageNotFound:
+            logger.debug(f"Pulling image {image}")
+            client.images.pull(image)
+
+        # Create or get shared network for git server and agents
+        network_name = f"cooperbench-git-{run_id}"
+        try:
+            client.networks.get(network_name)
+        except docker.errors.NotFound:
+            client.networks.create(network_name, driver="bridge")
+
+        # Container name based on run_id
+        container_name = f"cooperbench-git-{run_id}"
+
+        # Remove existing container if it exists
+        try:
+            old_container = client.containers.get(container_name)
+            old_container.remove(force=True)
+        except docker.errors.NotFound:
+            pass
+
+        # Create and start container with initialization script
+        # The script initializes the repo, then starts git daemon in foreground to keep container alive
+        init_script = """#!/bin/bash
+set -e
+apt-get update -qq
+apt-get install -y -qq git > /dev/null 2>&1
+mkdir -p /git/repo.git
+cd /git/repo.git
+git init --bare
+git config receive.denyCurrentBranch ignore
+touch git-daemon-export-ok
+exec git daemon --reuseaddr --export-all --enable=receive-pack --base-path=/git --listen=0.0.0.0 /git
+"""
+
+        container = client.containers.run(
+            image=image,
+            command=["bash", "-c", init_script],
+            name=container_name,
+            detach=True,
+            network=network_name,
+            ports={"9418/tcp": None},  # Auto-assign port for host access
+            remove=False,
+        )
+
+        # Wait for container to start and git daemon to initialize
+        time.sleep(3)
+
+        # Verify container is running
+        container.reload()
+        if container.status != "running":
+            logs = container.logs().decode("utf-8", errors="replace")
+            container.remove(force=True)
+            raise RuntimeError(f"Git server container failed to start. Logs: {logs}")
+
+        # Reload container to get port mapping
+        container.reload()
+
+        # Get the host port
+        port_bindings = container.attrs.get("NetworkSettings", {}).get("Ports", {})
+        if "9418/tcp" not in port_bindings or not port_bindings["9418/tcp"]:
+            container.stop()
+            container.remove(force=True)
+            raise RuntimeError("Failed to get port mapping for git daemon")
+
+        # Get container's IP on the network for inter-container communication
+        container.reload()
+        network_settings = container.attrs.get("NetworkSettings", {})
+        networks = network_settings.get("Networks", {})
+        if network_name in networks:
+            container_ip = networks[network_name].get("IPAddress")
+            if container_ip:
+                hostname = container_ip
+            else:
+                # Fallback to container name (DNS resolution on same network)
+                hostname = container_name
+        else:
+            # Fallback to container name
+            hostname = container_name
+
+        logger.debug(f"Git server ready at git://{hostname}:9418 (network: {network_name})")
+
+        return cls(container=container, hostname=hostname, port=9418, network_name=network_name)
+
+    @property
+    def url(self) -> str:
+        """Git URL for agents to use as remote.
+
+        Returns:
+            Git URL for the repository (git://hostname:port/repo.git)
+        """
+        return f"git://{self._hostname}:{self._port}/repo.git"
+
+    @property
+    def network_name(self) -> str:
+        """Docker network name for agent containers to join."""
+        return self._network_name
+
+    def cleanup(self) -> None:
+        """Stop and remove the git server container and network."""
+        if self._container:
+            try:
+                self._container.stop(timeout=5)
+            except Exception:
+                pass
+            try:
+                self._container.remove(force=True)
+            except Exception:
+                pass
+            self._container = None
+
+        # Clean up network
+        if hasattr(self, "_network_name") and self._network_name:
+            try:
+                client = docker.from_env()
+                try:
+                    network = client.networks.get(self._network_name)
+                    network.remove()
+                except docker.errors.NotFound:
+                    pass
+            except Exception:
+                pass
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/gcp.py b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/gcp.py
new file mode 100644
index 0000000..a3e41f5
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/gcp.py
@@ -0,0 +1,544 @@
+"""GCP VM-based git server for code collaboration."""
+
+from __future__ import annotations
+
+import logging
+import subprocess
+import time
+import uuid
+
+
+class GCPGitServer:
+    """Shared git server on GCP VM for code collaboration.
+
+    Creates a GCP VM running git-daemon that agents can push/pull to.
+    Supports VPC networking for agents on the same network.
+
+    Example:
+        server = GCPGitServer.create(
+            run_id="my-run",
+            project_id="my-project",
+            network="cooperbench-vpc",
+        )
+        print(server.url)  # git://10.128.0.5:9418/repo.git
+        print(server.network_name)  # cooperbench-vpc
+        # ... agents push/pull ...
+        server.cleanup()
+    """
+
+    def __init__(
+        self,
+        *,
+        vm_name: str,
+        project_id: str,
+        zone: str,
+        network: str | None,
+        git_url: str,
+    ):
+        """Initialize with existing VM info.
+
+        Use GCPGitServer.create() to create a new server.
+        """
+        self._vm_name = vm_name
+        self._project_id = project_id
+        self._zone = zone
+        self._network = network
+        self._git_url = git_url
+        self._vm_created = False
+        self._firewall_created = False
+        self._compute_client = None
+        self._logger = logging.getLogger("cooperbench.agents.mini_swe_agent_v2.git_server.gcp")
+
+    @classmethod
+    def create(
+        cls,
+        run_id: str,
+        *,
+        project_id: str | None = None,
+        zone: str = "us-central1-a",
+        machine_type: str = "e2-micro",
+        network: str | None = None,
+        timeout: int = 3600,
+    ) -> GCPGitServer:
+        """Create and start a git server VM.
+
+        Args:
+            run_id: Unique run identifier
+            project_id: GCP project ID (defaults to env/gcloud config)
+            zone: GCP zone for the VM
+            machine_type: VM machine type (e2-micro is smallest/cheapest)
+            network: VPC network name for agent connectivity (None for external IP)
+            timeout: VM timeout in seconds (not enforced, for reference)
+
+        Returns:
+            GCPGitServer instance ready to accept connections
+        """
+        logger = logging.getLogger("cooperbench.agents.mini_swe_agent_v2.git_server.gcp")
+        logger.debug(f"Creating GCP git server for run {run_id}")
+
+        # Resolve project ID
+        resolved_project_id = project_id or cls._get_default_project()
+        if not resolved_project_id:
+            raise ValueError(
+                "project_id required. Set via parameter, GOOGLE_CLOUD_PROJECT env var, "
+                "or gcloud config set project <project-id>"
+            )
+
+        # Generate unique VM name (sanitize run_id for GCP naming rules)
+        sanitized_run_id = run_id.replace("_", "-").lower()[:20]
+        vm_name = f"cooperbench-git-{sanitized_run_id}-{uuid.uuid4().hex[:8]}"
+
+        instance = cls(
+            vm_name=vm_name,
+            project_id=resolved_project_id,
+            zone=zone,
+            network=network,
+            git_url="",  # Will be set after VM is created
+        )
+
+        # Create VM and get IP
+        instance._create_git_server_vm(machine_type)
+        instance._wait_for_ssh()
+
+        # Create firewall rule to allow git protocol traffic
+        instance._create_firewall_rule()
+
+        # Get the git URL (internal IP if VPC, external IP otherwise)
+        ip_address = instance._get_vm_ip(use_internal=network is not None)
+        instance._git_url = f"git://{ip_address}:9418/repo.git"
+
+        logger.debug(f"Git server ready at {instance._git_url}")
+        return instance
+
+    @staticmethod
+    def _get_default_project() -> str | None:
+        """Get default GCP project from environment or gcloud config."""
+        import os
+
+        # Try environment variable first
+        project = os.environ.get("GOOGLE_CLOUD_PROJECT") or os.environ.get("GCLOUD_PROJECT")
+        if project:
+            return project
+
+        # Try cooperbench config
+        try:
+            from cooperbench.config import ConfigManager
+
+            config = ConfigManager()
+            project = config.get("gcp_project_id")
+            if project:
+                return project
+        except Exception:
+            pass
+
+        # Try gcloud config
+        try:
+            result = subprocess.run(
+                ["gcloud", "config", "get-value", "project"],
+                capture_output=True,
+                text=True,
+                timeout=10,
+            )
+            if result.returncode == 0 and result.stdout.strip():
+                return result.stdout.strip()
+        except Exception:
+            pass
+
+        return None
+
+    def _get_compute_client(self):
+        """Get or create Compute Engine client."""
+        if self._compute_client is None:
+            from google.cloud import compute_v1
+
+            self._compute_client = compute_v1.InstancesClient()
+        return self._compute_client
+
+    def _create_git_server_vm(self, machine_type: str):
+        """Create the GCP VM with git-daemon."""
+        from google.cloud import compute_v1
+
+        self._logger.debug(f"Creating GCP VM {self._vm_name} in {self._zone}")
+
+        client = self._get_compute_client()
+
+        # Startup script to install git and start git-daemon
+        startup_script = """#!/bin/bash
+set -e
+
+# Install git
+apt-get update && apt-get install -y git
+
+# Create bare repo
+mkdir -p /git/repo.git
+cd /git/repo.git
+git init --bare
+git config receive.denyCurrentBranch ignore
+touch git-daemon-export-ok
+
+# Start git daemon
+# --enable=receive-pack allows pushing
+# --export-all exports all repos
+# --base-path=/git means URL /repo.git maps to /git/repo.git
+# --reuseaddr allows quick restarts
+git daemon \
+    --reuseaddr \
+    --export-all \
+    --enable=receive-pack \
+    --base-path=/git \
+    --listen=0.0.0.0 \
+    /git &
+
+echo "Git daemon started"
+"""
+
+        # Build instance configuration
+        instance = compute_v1.Instance()
+        instance.name = self._vm_name
+        instance.machine_type = f"zones/{self._zone}/machineTypes/{machine_type}"
+
+        # Boot disk with Debian
+        disk = compute_v1.AttachedDisk()
+        disk.auto_delete = True
+        disk.boot = True
+        disk.type_ = "PERSISTENT"
+
+        disk_init = compute_v1.AttachedDiskInitializeParams()
+        disk_init.disk_size_gb = 10  # Small disk for git server
+        disk_init.source_image = "projects/debian-cloud/global/images/family/debian-11"
+        disk.initialize_params = disk_init
+        instance.disks = [disk]
+
+        # Network interface
+        network_interface = compute_v1.NetworkInterface()
+        if self._network:
+            network_interface.network = f"projects/{self._project_id}/global/networks/{self._network}"
+        else:
+            network_interface.network = f"projects/{self._project_id}/global/networks/default"
+
+        # External IP for SSH access (and git access if no VPC)
+        access_config = compute_v1.AccessConfig()
+        access_config.name = "External NAT"
+        access_config.type_ = "ONE_TO_ONE_NAT"
+        network_interface.access_configs = [access_config]
+        instance.network_interfaces = [network_interface]
+
+        # Service account with minimal scopes
+        service_account = compute_v1.ServiceAccount()
+        service_account.email = "default"
+        service_account.scopes = [
+            "https://www.googleapis.com/auth/logging.write",
+        ]
+        instance.service_accounts = [service_account]
+
+        # Metadata with startup script
+        metadata = compute_v1.Metadata()
+        metadata.items = [
+            compute_v1.Items(key="startup-script", value=startup_script),
+        ]
+        instance.metadata = metadata
+
+        # Tags for firewall rules
+        tags = compute_v1.Tags()
+        tags.items = [f"cooperbench-git-{self._vm_name}"]
+        instance.tags = tags
+
+        # Create the instance
+        request = compute_v1.InsertInstanceRequest()
+        request.project = self._project_id
+        request.zone = self._zone
+        request.instance_resource = instance
+
+        operation = client.insert(request=request)
+
+        # Mark VM as created immediately after insert succeeds
+        # This ensures cleanup() will delete the VM even if _wait_for_operation times out
+        self._vm_created = True
+
+        # Wait for operation to complete
+        self._wait_for_operation(operation.name)
+        self._logger.debug(f"VM {self._vm_name} created successfully")
+
+    def _wait_for_operation(self, operation_name: str, timeout: int = 300):
+        """Wait for a zone operation to complete."""
+        from google.cloud import compute_v1
+
+        client = compute_v1.ZoneOperationsClient()
+        start_time = time.time()
+
+        while time.time() - start_time < timeout:
+            operation = client.get(
+                project=self._project_id,
+                zone=self._zone,
+                operation=operation_name,
+            )
+
+            if operation.status == compute_v1.Operation.Status.DONE:
+                if operation.error:
+                    errors = [e.message for e in operation.error.errors]
+                    raise RuntimeError(f"VM operation failed: {errors}")
+                return
+
+            time.sleep(2)
+
+        raise TimeoutError(f"Operation {operation_name} timed out after {timeout}s")
+
+    def _wait_for_global_operation(self, operation_name: str, timeout: int = 300):
+        """Wait for a global operation to complete."""
+        from google.cloud import compute_v1
+
+        client = compute_v1.GlobalOperationsClient()
+        start_time = time.time()
+
+        while time.time() - start_time < timeout:
+            operation = client.get(
+                project=self._project_id,
+                operation=operation_name,
+            )
+
+            if operation.status == compute_v1.Operation.Status.DONE:
+                if operation.error:
+                    errors = [e.message for e in operation.error.errors]
+                    raise RuntimeError(f"Global operation failed: {errors}")
+                return
+
+            time.sleep(2)
+
+        raise TimeoutError(f"Operation {operation_name} timed out after {timeout}s")
+
+    def _wait_for_ssh(self, timeout: int = 180):
+        """Wait for SSH to become available on the VM."""
+        self._logger.debug(f"Waiting for SSH on {self._vm_name}...")
+        start_time = time.time()
+
+        while time.time() - start_time < timeout:
+            try:
+                # Try a simple SSH command
+                result = subprocess.run(
+                    [
+                        "gcloud",
+                        "compute",
+                        "ssh",
+                        self._vm_name,
+                        f"--zone={self._zone}",
+                        f"--project={self._project_id}",
+                        "--command=echo ready",
+                        "--quiet",
+                        "--strict-host-key-checking=no",
+                    ],
+                    capture_output=True,
+                    text=True,
+                    timeout=30,
+                )
+                if result.returncode == 0 and "ready" in result.stdout:
+                    self._logger.debug("SSH is ready")
+                    # Wait a bit more for startup script to complete
+                    self._wait_for_git_daemon()
+                    return
+            except subprocess.TimeoutExpired:
+                pass
+            except Exception as e:
+                self._logger.debug(f"SSH not ready yet: {e}")
+
+            time.sleep(5)
+
+        raise TimeoutError(f"SSH not available on {self._vm_name} after {timeout}s")
+
+    def _wait_for_git_daemon(self, timeout: int = 120):
+        """Wait for git-daemon to start on the VM."""
+        self._logger.debug("Waiting for git-daemon to start...")
+        start_time = time.time()
+
+        while time.time() - start_time < timeout:
+            try:
+                result = subprocess.run(
+                    [
+                        "gcloud",
+                        "compute",
+                        "ssh",
+                        self._vm_name,
+                        f"--zone={self._zone}",
+                        f"--project={self._project_id}",
+                        "--command=pgrep -f git-daemon",
+                        "--quiet",
+                        "--strict-host-key-checking=no",
+                    ],
+                    capture_output=True,
+                    text=True,
+                    timeout=30,
+                )
+                if result.returncode == 0:
+                    self._logger.debug("git-daemon is running")
+                    return
+            except Exception as e:
+                self._logger.debug(f"git-daemon not ready yet: {e}")
+
+            time.sleep(5)
+
+        raise TimeoutError(f"git-daemon not started on {self._vm_name} after {timeout}s")
+
+    def _create_firewall_rule(self):
+        """Create firewall rule to allow git protocol traffic."""
+        from google.cloud import compute_v1
+
+        firewall_name = f"cooperbench-git-{self._vm_name}"
+        self._logger.debug(f"Creating firewall rule {firewall_name}")
+
+        client = compute_v1.FirewallsClient()
+
+        firewall = compute_v1.Firewall()
+        firewall.name = firewall_name
+
+        # Use specified VPC or default network
+        network_name = self._network or "default"
+        firewall.network = f"projects/{self._project_id}/global/networks/{network_name}"
+
+        # Allow TCP port 9418 (git daemon)
+        allowed = compute_v1.Allowed()
+        allowed.I_p_protocol = "tcp"
+        allowed.ports = ["9418"]
+        firewall.allowed = [allowed]
+
+        # Source ranges: internal only for VPC, anywhere for external mode
+        if self._network:
+            firewall.source_ranges = ["10.0.0.0/8"]
+        else:
+            firewall.source_ranges = ["0.0.0.0/0"]
+
+        # Target only this VM
+        firewall.target_tags = [f"cooperbench-git-{self._vm_name}"]
+
+        request = compute_v1.InsertFirewallRequest()
+        request.project = self._project_id
+        request.firewall_resource = firewall
+
+        operation = client.insert(request=request)
+
+        # Mark firewall as created immediately
+        self._firewall_created = True
+
+        # Wait for operation
+        self._wait_for_global_operation(operation.name)
+        self._logger.debug(f"Firewall rule {firewall_name} created")
+
+    def _get_vm_ip(self, use_internal: bool = False) -> str:
+        """Get the IP address of the VM.
+
+        Args:
+            use_internal: If True, return internal IP (for VPC). Otherwise external.
+
+        Returns:
+            IP address string
+        """
+        from google.cloud import compute_v1
+
+        client = self._get_compute_client()
+
+        request = compute_v1.GetInstanceRequest()
+        request.project = self._project_id
+        request.zone = self._zone
+        request.instance = self._vm_name
+
+        instance = client.get(request=request)
+
+        if not instance.network_interfaces:
+            raise RuntimeError(f"VM {self._vm_name} has no network interfaces")
+
+        network_interface = instance.network_interfaces[0]
+
+        if use_internal:
+            # Return internal IP
+            return network_interface.network_i_p
+        else:
+            # Return external IP
+            if not network_interface.access_configs:
+                raise RuntimeError(f"VM {self._vm_name} has no external IP")
+            return network_interface.access_configs[0].nat_i_p
+
+    @property
+    def url(self) -> str:
+        """Git URL for agents to use as remote.
+
+        Returns:
+            Git URL for the repository (git://IP:9418/repo.git)
+        """
+        return self._git_url
+
+    @property
+    def network_name(self) -> str | None:
+        """VPC network name for agents to join.
+
+        Returns:
+            Network name or None if using external IP
+        """
+        return self._network
+
+    def cleanup(self) -> None:
+        """Delete the VM and firewall rule."""
+        # Delete firewall rule first
+        if self._firewall_created:
+            self._delete_firewall_rule()
+
+        # Delete VM
+        if self._vm_created:
+            self._delete_vm()
+
+    def _delete_firewall_rule(self):
+        """Delete the firewall rule."""
+        from google.cloud import compute_v1
+
+        firewall_name = f"cooperbench-git-{self._vm_name}"
+        self._logger.debug(f"Deleting firewall rule {firewall_name}")
+
+        try:
+            client = compute_v1.FirewallsClient()
+
+            request = compute_v1.DeleteFirewallRequest()
+            request.project = self._project_id
+            request.firewall = firewall_name
+
+            operation = client.delete(request=request)
+
+            try:
+                self._wait_for_global_operation(operation.name, timeout=120)
+            except TimeoutError:
+                self._logger.warning(f"Firewall deletion timed out: {firewall_name}")
+
+            self._firewall_created = False
+            self._logger.debug(f"Firewall rule {firewall_name} deleted")
+        except Exception as e:
+            self._logger.warning(f"Failed to delete firewall rule {firewall_name}: {e}")
+
+    def _delete_vm(self):
+        """Delete the VM."""
+        self._logger.debug(f"Deleting VM {self._vm_name}")
+
+        try:
+            from google.cloud import compute_v1
+
+            client = self._get_compute_client()
+
+            request = compute_v1.DeleteInstanceRequest()
+            request.project = self._project_id
+            request.zone = self._zone
+            request.instance = self._vm_name
+
+            operation = client.delete(request=request)
+
+            try:
+                self._wait_for_operation(operation.name, timeout=120)
+            except TimeoutError:
+                # VM deletion initiated, will complete asynchronously
+                pass
+
+            self._vm_created = False
+            self._logger.debug(f"VM {self._vm_name} deleted")
+        except Exception as e:
+            self._logger.warning(f"Failed to delete VM {self._vm_name}: {e}")
+
+    def __del__(self):
+        """Ensure cleanup on garbage collection."""
+        try:
+            self.cleanup()
+        except Exception:
+            pass
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/modal.py b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/modal.py
new file mode 100644
index 0000000..fc8fc74
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/modal.py
@@ -0,0 +1,139 @@
+"""Modal-based git server for code collaboration."""
+
+from __future__ import annotations
+
+import logging
+import time
+
+import modal
+
+
+class ModalGitServer:
+    """Shared git server sandbox for code collaboration using Modal.
+
+    Creates a Modal sandbox running git-daemon that agents can push/pull to.
+    """
+
+    def __init__(self, sandbox: modal.Sandbox, hostname: str):
+        """Initialize with an existing sandbox.
+
+        Use ModalGitServer.create() to create a new server.
+        """
+        self._sandbox = sandbox
+        self._hostname = hostname
+        self._logger = logging.getLogger("cooperbench.agents.mini_swe_agent_v2.git_server.modal")
+
+    @classmethod
+    def create(
+        cls,
+        app: modal.App,
+        run_id: str,
+        timeout: int = 3600,
+    ) -> ModalGitServer:
+        """Create and start a git server sandbox.
+
+        Args:
+            app: Modal app to create sandbox in
+            run_id: Unique run identifier (for logging)
+            timeout: Sandbox timeout in seconds
+
+        Returns:
+            ModalGitServer instance ready to accept connections
+        """
+        logger = logging.getLogger("cooperbench.agents.mini_swe_agent_v2.git_server.modal")
+        logger.debug(f"Creating git server for run {run_id}")
+
+        # Image with git
+        image = modal.Image.debian_slim().run_commands(
+            "apt-get update && apt-get install -y git",
+        )
+
+        # Create sandbox with port 9418 exposed for git daemon (unencrypted TCP)
+        sandbox = modal.Sandbox.create(
+            image=image,
+            app=app,
+            timeout=timeout,
+            unencrypted_ports=[9418],  # Expose git daemon port via TCP tunnel
+        )
+
+        # Initialize bare repo in /git/repo.git
+        proc = sandbox.exec(
+            "bash",
+            "-c",
+            """
+            set -e
+            mkdir -p /git/repo.git
+            cd /git/repo.git
+            git init --bare
+            git config receive.denyCurrentBranch ignore
+            touch git-daemon-export-ok
+        """,
+        )
+        proc.wait()
+        if proc.returncode != 0:
+            raise RuntimeError(f"Failed to init git repo: {proc.stderr.read()}")
+
+        # Start git daemon in background
+        # --enable=receive-pack allows pushing
+        # --export-all exports all repos
+        # --base-path=/git means URL /repo.git maps to /git/repo.git
+        # --listen=0.0.0.0 to accept connections from tunnel
+        proc = sandbox.exec(
+            "bash",
+            "-c",
+            """
+            git daemon \
+                --reuseaddr \
+                --export-all \
+                --enable=receive-pack \
+                --base-path=/git \
+                --listen=0.0.0.0 \
+                /git &
+
+            # Wait for daemon to start
+            sleep 1
+            echo "Git daemon started"
+        """,
+        )
+        proc.stdout.read()
+        proc.wait()
+
+        # Give daemon time to fully initialize
+        time.sleep(1)
+
+        # Get the tunnel URL for port 9418
+        tunnels = sandbox.tunnels()
+
+        if tunnels and 9418 in tunnels:
+            tunnel = tunnels[9418]
+            # Use the unencrypted endpoint for git protocol
+            # Tunnel has: host, port (encrypted), unencrypted_host, unencrypted_port
+            hostname = f"{tunnel.unencrypted_host}:{tunnel.unencrypted_port}"
+            logger.debug(f"Using unencrypted tunnel: {hostname}")
+        else:
+            raise RuntimeError(f"Failed to get tunnel for port 9418. Available tunnels: {tunnels}")
+
+        logger.debug(f"Git server ready at git://{hostname}")
+
+        return cls(sandbox=sandbox, hostname=hostname)
+
+    @property
+    def url(self) -> str:
+        """Git URL for agents to use as remote.
+
+        Returns:
+            Git URL for the repository (git://hostname/repo.git)
+        """
+        return f"git://{self._hostname}/repo.git"
+
+    def cleanup(self) -> None:
+        """Terminate the git server sandbox."""
+        if self._sandbox:
+            try:
+                self._sandbox.terminate()
+            except Exception:
+                pass
+
+
+# Backwards compatibility alias
+GitServer = ModalGitServer
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/connectors/messaging.py b/src/cooperbench/agents/mini_swe_agent_v2/connectors/messaging.py
new file mode 100644
index 0000000..59dd1c3
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/connectors/messaging.py
@@ -0,0 +1,113 @@
+"""Redis-based mailbox messaging between agents.
+
+Provides simple send/receive messaging via Redis lists. Each agent has an inbox
+that other agents can push messages to.
+
+Example:
+    connector = MessagingConnector(
+        agent_id="agent1",
+        agents=["agent1", "agent2"],
+        url="redis://localhost:6379#run:abc123"
+    )
+
+    # Send to specific agent
+    connector.send("agent2", "I found a bug in auth.py")
+
+    # Receive pending messages
+    messages = connector.receive()
+
+    # Broadcast to all
+    connector.broadcast("I'm starting on the API changes")
+"""
+
+import json
+from datetime import datetime
+from typing import Any
+
+import redis
+
+
+class MessagingConnector:
+    """Redis-based mailbox messaging between agents."""
+
+    def __init__(self, agent_id: str, agents: list[str], url: str = "redis://localhost:6379"):
+        """Initialize messaging connector.
+
+        Args:
+            agent_id: This agent's unique identifier (e.g., "agent1")
+            agents: List of all agent IDs in the collaboration
+            url: Redis URL. Supports namespacing via #prefix (e.g., "redis://host:6379#run:abc")
+        """
+        self.agent_id = agent_id
+        self.agents = agents
+
+        # Parse optional namespace prefix from URL (format: url#prefix)
+        if "#" in url:
+            url, self._prefix = url.split("#", 1)
+            self._prefix += ":"
+        else:
+            self._prefix = ""
+
+        self._client = redis.from_url(url)
+        self._inbox_key = f"{self._prefix}{agent_id}:inbox"
+
+        # Clear stale messages from previous runs
+        self._client.delete(self._inbox_key)
+
+    def setup(self, env: Any) -> None:
+        """Configure the agent's sandbox for messaging.
+
+        Messaging doesn't require sandbox configuration (it's pure Redis),
+        but this method exists for interface consistency with other connectors.
+
+        Args:
+            env: The agent's environment (unused for messaging)
+        """
+        pass
+
+    def send(self, recipient: str, content: str) -> None:
+        """Send a message to another agent's inbox.
+
+        Args:
+            recipient: Target agent's ID
+            content: Message content
+        """
+        message = {
+            "from": self.agent_id,
+            "to": recipient,
+            "content": content,
+            "timestamp": datetime.now().isoformat(),
+        }
+        self._client.rpush(f"{self._prefix}{recipient}:inbox", json.dumps(message))
+
+    def receive(self) -> list[dict]:
+        """Get all pending messages from inbox (empties the inbox).
+
+        Returns:
+            List of message dicts with from, to, content, timestamp
+        """
+        messages = []
+        while True:
+            msg = self._client.lpop(self._inbox_key)
+            if msg is None:
+                break
+            messages.append(json.loads(msg))
+        return messages
+
+    def broadcast(self, content: str) -> None:
+        """Send a message to all other agents.
+
+        Args:
+            content: Message content
+        """
+        for agent in self.agents:
+            if agent != self.agent_id:
+                self.send(agent, content)
+
+    def peek(self) -> int:
+        """Check how many messages are waiting without consuming them.
+
+        Returns:
+            Number of pending messages
+        """
+        return self._client.llen(self._inbox_key)
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/environments/__init__.py b/src/cooperbench/agents/mini_swe_agent_v2/environments/__init__.py
new file mode 100644
index 0000000..2e22b4e
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/environments/__init__.py
@@ -0,0 +1 @@
+"""Environment implementations for mini-SWE-agent v2."""
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/environments/docker.py b/src/cooperbench/agents/mini_swe_agent_v2/environments/docker.py
new file mode 100644
index 0000000..f651783
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/environments/docker.py
@@ -0,0 +1,161 @@
+import logging
+import os
+import platform
+import shlex
+import subprocess
+import uuid
+from typing import Any
+
+from pydantic import BaseModel
+
+from cooperbench.agents.mini_swe_agent_v2.exceptions import Submitted
+from cooperbench.agents.mini_swe_agent_v2.utils.serialize import recursive_merge
+
+
+class DockerEnvironmentConfig(BaseModel):
+    image: str
+    cwd: str = "/"
+    """Working directory in which to execute commands."""
+    env: dict[str, str] = {}
+    """Environment variables to set in the container."""
+    forward_env: list[str] = []
+    """Environment variables to forward to the container.
+    Variables are only forwarded if they are set in the host environment.
+    In case of conflict with `env`, the `env` variables take precedence.
+    """
+    timeout: int = 30
+    """Timeout for executing commands in the container."""
+    executable: str = os.getenv("MSWEA_DOCKER_EXECUTABLE", "docker")
+    """Path to the docker/container executable."""
+    run_args: list[str] = ["--rm"]
+    """Additional arguments to pass to the docker/container executable.
+    Default is ["--rm"], which removes the container after it exits.
+    """
+    container_timeout: str = "2h"
+    """Max duration to keep container running. Uses the same format as the sleep command."""
+    pull_timeout: int = 120
+    """Timeout in seconds for pulling images."""
+    interpreter: list[str] = ["bash", "-lc"]
+    """Interpreter to use to execute commands. Default is ["bash", "-lc"].
+    The actual command will be appended as argument to this. Override this to e.g., modify shell flags
+    (e.g., to remove the `-l` flag to disable login shell) or to use python instead of bash to interpret commands.
+    """
+
+
+class DockerEnvironment:
+    def __init__(
+        self,
+        *,
+        config_class: type = DockerEnvironmentConfig,
+        logger: logging.Logger | None = None,
+        **kwargs,
+    ):
+        """This class executes bash commands in a Docker container using direct docker commands.
+        See `DockerEnvironmentConfig` for keyword arguments.
+        """
+        self.logger = logger or logging.getLogger("mini_swe_agent_v2.environment")
+        self.container_id: str | None = None
+        self.config = config_class(**kwargs)
+        self._start_container()
+
+    def get_template_vars(self, **kwargs) -> dict[str, Any]:
+        return recursive_merge(self.config.model_dump(), platform.uname()._asdict(), kwargs)
+
+    def serialize(self) -> dict:
+        return {
+            "info": {
+                "config": {
+                    "environment": self.config.model_dump(mode="json"),
+                    "environment_type": f"{self.__class__.__module__}.{self.__class__.__name__}",
+                }
+            }
+        }
+
+    def _start_container(self):
+        """Start the Docker container and return the container ID."""
+        container_name = f"minisweagent-{uuid.uuid4().hex[:8]}"
+        cmd = [
+            self.config.executable,
+            "run",
+            "-d",
+            "--name",
+            container_name,
+            "-w",
+            self.config.cwd,
+            *self.config.run_args,
+            self.config.image,
+            "sleep",
+            self.config.container_timeout,
+        ]
+        self.logger.debug(f"Starting container with command: {shlex.join(cmd)}")
+        result = subprocess.run(
+            cmd,
+            capture_output=True,
+            text=True,
+            timeout=self.config.pull_timeout,  # docker pull might take a while
+            check=True,
+        )
+        self.logger.info(f"Started container {container_name} with ID {result.stdout.strip()}")
+        self.container_id = result.stdout.strip()
+
+    def execute(self, action: dict, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]:
+        """Execute a command in the Docker container and return the result as a dict."""
+        command = action.get("command", "")
+        cwd = cwd or self.config.cwd
+        assert self.container_id, "Container not started"
+
+        cmd = [self.config.executable, "exec", "-w", cwd]
+        for key in self.config.forward_env:
+            if (value := os.getenv(key)) is not None:
+                cmd.extend(["-e", f"{key}={value}"])
+        for key, value in self.config.env.items():
+            cmd.extend(["-e", f"{key}={value}"])
+        cmd.extend([self.container_id, *self.config.interpreter, command])
+
+        try:
+            result = subprocess.run(
+                cmd,
+                text=True,
+                timeout=timeout or self.config.timeout,
+                encoding="utf-8",
+                errors="replace",
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+            )
+            output = {"output": result.stdout, "returncode": result.returncode, "exception_info": ""}
+        except Exception as e:
+            raw_output = getattr(e, "output", None)
+            raw_output = (
+                raw_output.decode("utf-8", errors="replace") if isinstance(raw_output, bytes) else (raw_output or "")
+            )
+            output = {
+                "output": raw_output,
+                "returncode": -1,
+                "exception_info": f"An error occurred while executing the command: {e}",
+                "extra": {"exception_type": type(e).__name__, "exception": str(e)},
+            }
+        self._check_finished(output)
+        return output
+
+    def _check_finished(self, output: dict):
+        """Raises Submitted if the output indicates task completion."""
+        lines = output.get("output", "").lstrip().splitlines(keepends=True)
+        if lines and lines[0].strip() == "COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT" and output["returncode"] == 0:
+            submission = "".join(lines[1:])
+            raise Submitted(
+                {
+                    "role": "exit",
+                    "content": submission,
+                    "extra": {"exit_status": "Submitted", "submission": submission},
+                }
+            )
+
+    def cleanup(self):
+        """Stop and remove the Docker container."""
+        if getattr(self, "container_id", None) is not None:  # if init fails early, container_id might not be set
+            cmd = f"(timeout 60 {self.config.executable} stop {self.container_id} || {self.config.executable} rm -f {self.container_id}) >/dev/null 2>&1 &"
+            subprocess.Popen(cmd, shell=True)
+
+    def __del__(self):
+        """Cleanup container when object is destroyed."""
+        self.cleanup()
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/environments/modal.py b/src/cooperbench/agents/mini_swe_agent_v2/environments/modal.py
new file mode 100644
index 0000000..ab43d44
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/environments/modal.py
@@ -0,0 +1,264 @@
+"""Modal Sandbox environment for cloud execution (v2 interface)."""
+
+import logging
+import platform
+import threading
+import time
+from typing import Any
+
+import modal
+from pydantic import BaseModel
+
+# Retryable error patterns
+_RETRYABLE_PATTERNS = [
+    "Image build",
+    "UNAVAILABLE",
+    "DEADLINE_EXCEEDED",
+    "INTERNAL",
+    "temporarily unavailable",
+    "rate limit",
+    "ClientClosed",
+    "NOT_FOUND",
+    "Sandbox not found",
+    "already shut down",
+    "Container ID",
+    "finished",
+]
+
+# Global thread-safe image cache to prevent duplicate builds
+_image_cache: dict[str, modal.Image] = {}
+_image_locks: dict[str, threading.Lock] = {}
+_cache_lock = threading.Lock()
+
+# Global Modal app (shared across all environments)
+_global_app: modal.App | None = None
+_app_lock = threading.Lock()
+
+
+def _get_global_app() -> modal.App:
+    """Get or create the global Modal app (thread-safe)."""
+    global _global_app
+    with _app_lock:
+        if _global_app is None:
+            _global_app = modal.App.lookup("cooperbench", create_if_missing=True)
+        return _global_app
+
+
+def _reset_global_app() -> None:
+    """Reset the global app (e.g., after ClientClosed errors)."""
+    global _global_app
+    with _app_lock:
+        _global_app = None
+
+
+def _get_or_build_image(image_name: str) -> modal.Image:
+    """Get cached image or build it (thread-safe, only one build per image)."""
+    if image_name in _image_cache:
+        return _image_cache[image_name]
+
+    with _cache_lock:
+        if image_name not in _image_locks:
+            _image_locks[image_name] = threading.Lock()
+        lock = _image_locks[image_name]
+
+    with lock:
+        if image_name in _image_cache:
+            return _image_cache[image_name]
+
+        image = modal.Image.from_registry(image_name).entrypoint([])
+        _image_cache[image_name] = image
+        return image
+
+
+def _invalidate_image(image_name: str) -> None:
+    """Remove an image from cache (e.g., after build failure)."""
+    with _cache_lock:
+        _image_cache.pop(image_name, None)
+
+
+class ModalEnvironmentConfig(BaseModel):
+    image: str
+    cwd: str = "/"
+    timeout: int = 3600
+    env: dict[str, str] = {}
+    max_retries: int = 5
+    retry_delay: float = 5.0
+
+
+class ModalEnvironment:
+    sb: modal.Sandbox | None
+
+    def __init__(
+        self,
+        *,
+        config_class: type = ModalEnvironmentConfig,
+        logger: logging.Logger | None = None,
+        **kwargs,
+    ):
+        self.logger = logger or logging.getLogger("cooperbench.agents.mini_swe_agent_v2.modal")
+        self.config = config_class(**kwargs)
+        self.sb = None
+        self._start_sandbox_with_retry()
+
+    def _reset_client(self):
+        """Reset Modal client state for fresh connection."""
+        _reset_global_app()
+        _invalidate_image(self.config.image)
+
+    def _build_image(self):
+        """Build the Modal image (globally cached, thread-safe)."""
+        return _get_or_build_image(self.config.image)
+
+    def _is_retryable(self, error: Exception) -> bool:
+        """Check if error is retryable."""
+        error_str = str(error) + str(type(error).__name__)
+        return any(p in error_str for p in _RETRYABLE_PATTERNS)
+
+    def _start_sandbox(self):
+        """Create and start the Modal Sandbox (single attempt)."""
+        self.logger.debug(f"Creating Modal Sandbox with image: {self.config.image}")
+        image = self._build_image()
+        self.sb = modal.Sandbox.create(
+            image=image,
+            timeout=self.config.timeout,
+            workdir=self.config.cwd,
+            app=_get_global_app(),
+        )
+        self.logger.debug(f"Sandbox created: {self.sb.object_id}")
+
+    def _start_sandbox_with_retry(self):
+        """Create sandbox with retry logic for transient failures."""
+        last_error = None
+        for attempt in range(self.config.max_retries):
+            try:
+                self._start_sandbox()
+                return
+            except Exception as e:
+                last_error = e
+                error_str = str(e) + str(type(e).__name__)
+
+                if attempt < self.config.max_retries - 1 and self._is_retryable(e):
+                    delay = self.config.retry_delay * (2**attempt)
+                    self.logger.warning(
+                        f"Sandbox creation failed (attempt {attempt + 1}/{self.config.max_retries}): {e}. "
+                        f"Retrying in {delay:.1f}s..."
+                    )
+                    time.sleep(delay)
+                    if "ClientClosed" in error_str or "Image build" in error_str:
+                        self._reset_client()
+                else:
+                    break
+
+        raise last_error
+
+    def _is_sandbox_dead(self, error: Exception) -> bool:
+        """Check if the error indicates the sandbox has terminated."""
+        error_str = str(error) + str(type(error).__name__)
+        return any(
+            x in error_str
+            for x in [
+                "NOT_FOUND",
+                "Sandbox not found",
+                "already shut down",
+                "Container ID",
+                "finished",
+                "ClientClosed",
+            ]
+        )
+
+    def _reconnect_sandbox(self):
+        """Attempt to create a new sandbox after the old one died."""
+        self.logger.warning("Sandbox terminated unexpectedly, creating new sandbox...")
+        old_sb = self.sb
+        self.sb = None
+        if old_sb:
+            try:
+                old_sb.terminate()
+            except Exception:
+                pass
+        self._reset_client()
+        self._start_sandbox_with_retry()
+
+    def get_template_vars(self, **kwargs) -> dict[str, Any]:
+        return self.config.model_dump() | {
+            "system": "Linux",
+            "release": "modal",
+            "version": "",
+            "machine": platform.machine(),
+        }
+
+    def serialize(self) -> dict:
+        return {
+            "info": {
+                "config": {
+                    "environment": self.config.model_dump(mode="json"),
+                    "environment_type": f"{self.__class__.__module__}.{self.__class__.__name__}",
+                }
+            }
+        }
+
+    def execute(self, action: dict, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]:
+        """Execute a command in the Modal Sandbox with retry on sandbox death.
+
+        v2 interface: action is a dict with {"command": "..."}.
+        """
+        command = action.get("command", "")
+        cwd = cwd or self.config.cwd
+        last_error: Exception | None = None
+
+        for attempt in range(self.config.max_retries):
+            try:
+                if self.sb is None:
+                    raise RuntimeError("Sandbox not initialized")
+                proc = self.sb.exec("bash", "-lc", f"cd {cwd} && {command}")
+                stdout = proc.stdout.read()
+                stderr = proc.stderr.read()
+                proc.wait()
+                output = stdout + stderr if stderr else stdout
+                result = {"output": output, "returncode": proc.returncode, "exception_info": ""}
+                self._check_finished(result)
+                return result
+            except Exception as e:
+                # Re-raise Submitted exceptions (task completion)
+                from cooperbench.agents.mini_swe_agent_v2.exceptions import Submitted
+
+                if isinstance(e, Submitted):
+                    raise
+                last_error = e
+                if self._is_sandbox_dead(e) and attempt < self.config.max_retries - 1:
+                    self.logger.warning(
+                        f"Sandbox died during execution (attempt {attempt + 1}/{self.config.max_retries}): {e}"
+                    )
+                    self._reconnect_sandbox()
+                else:
+                    raise
+
+        if last_error is not None:
+            raise last_error
+        raise RuntimeError("No retries attempted")
+
+    def _check_finished(self, output: dict):
+        """Raises Submitted if the output indicates task completion."""
+        from cooperbench.agents.mini_swe_agent_v2.exceptions import Submitted
+
+        lines = output.get("output", "").lstrip().splitlines(keepends=True)
+        if lines and lines[0].strip() == "COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT" and output["returncode"] == 0:
+            submission = "".join(lines[1:])
+            raise Submitted(
+                {
+                    "role": "exit",
+                    "content": submission,
+                    "extra": {"exit_status": "Submitted", "submission": submission},
+                }
+            )
+
+    def cleanup(self):
+        """Terminate the Modal Sandbox."""
+        if hasattr(self, "sb") and self.sb:
+            try:
+                self.sb.terminate()
+            except Exception:
+                pass
+
+    def __del__(self):
+        self.cleanup()
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/exceptions.py b/src/cooperbench/agents/mini_swe_agent_v2/exceptions.py
new file mode 100644
index 0000000..0295d7e
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/exceptions.py
@@ -0,0 +1,22 @@
+class InterruptAgentFlow(Exception):
+    """Raised to interrupt the agent flow and add messages."""
+
+    def __init__(self, *messages: dict):
+        self.messages = messages
+        super().__init__()
+
+
+class Submitted(InterruptAgentFlow):
+    """Raised when the agent has completed its task."""
+
+
+class LimitsExceeded(InterruptAgentFlow):
+    """Raised when the agent has exceeded its cost or step limit."""
+
+
+class UserInterruption(InterruptAgentFlow):
+    """Raised when the user interrupts the agent."""
+
+
+class FormatError(InterruptAgentFlow):
+    """Raised when the LM's output is not in the expected format."""
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/models/__init__.py b/src/cooperbench/agents/mini_swe_agent_v2/models/__init__.py
new file mode 100644
index 0000000..70b1047
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/models/__init__.py
@@ -0,0 +1,34 @@
+"""Model implementations for mini-SWE-agent v2."""
+
+import os
+import threading
+
+
+class GlobalModelStats:
+    """Global model statistics tracker with optional limits."""
+
+    def __init__(self):
+        self._cost = 0.0
+        self._n_calls = 0
+        self._lock = threading.Lock()
+        self.cost_limit = float(os.getenv("MSWEA_GLOBAL_COST_LIMIT", "0"))
+        self.call_limit = int(os.getenv("MSWEA_GLOBAL_CALL_LIMIT", "0"))
+
+    def add(self, cost: float) -> None:
+        """Add a model call with its cost, checking limits."""
+        with self._lock:
+            self._cost += cost
+            self._n_calls += 1
+        if 0 < self.cost_limit < self._cost or 0 < self.call_limit < self._n_calls + 1:
+            raise RuntimeError(f"Global cost/call limit exceeded: ${self._cost:.4f} / {self._n_calls}")
+
+    @property
+    def cost(self) -> float:
+        return self._cost
+
+    @property
+    def n_calls(self) -> int:
+        return self._n_calls
+
+
+GLOBAL_MODEL_STATS = GlobalModelStats()
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/models/litellm_model.py b/src/cooperbench/agents/mini_swe_agent_v2/models/litellm_model.py
new file mode 100644
index 0000000..fa9dad1
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/models/litellm_model.py
@@ -0,0 +1,149 @@
+import json
+import logging
+import os
+import time
+from collections.abc import Callable
+from pathlib import Path
+from typing import Any, Literal
+
+import litellm
+from pydantic import BaseModel
+
+from cooperbench.agents.mini_swe_agent_v2.models import GLOBAL_MODEL_STATS
+from cooperbench.agents.mini_swe_agent_v2.models.utils.actions_toolcall import (
+    BASH_TOOL,
+    SEND_MESSAGE_TOOL,
+    format_toolcall_observation_messages,
+    parse_toolcall_actions,
+)
+from cooperbench.agents.mini_swe_agent_v2.models.utils.anthropic_utils import _reorder_anthropic_thinking_blocks
+from cooperbench.agents.mini_swe_agent_v2.models.utils.cache_control import set_cache_control
+from cooperbench.agents.mini_swe_agent_v2.models.utils.openai_multimodal import expand_multimodal_content
+from cooperbench.agents.mini_swe_agent_v2.models.utils.retry import retry
+
+logger = logging.getLogger("litellm_model")
+
+
+class LitellmModelConfig(BaseModel):
+    model_name: str
+    """Model name. Highly recommended to include the provider in the model name, e.g., `anthropic/claude-sonnet-4-5-20250929`."""
+    model_kwargs: dict[str, Any] = {}
+    """Additional arguments passed to the API."""
+    litellm_model_registry: Path | str | None = os.getenv("LITELLM_MODEL_REGISTRY_PATH")
+    """Model registry for cost tracking and model metadata. See the local model guide (https://mini-swe-agent.com/latest/models/local_models/) for more details."""
+    set_cache_control: Literal["default_end"] | None = None
+    """Set explicit cache control markers, for example for Anthropic models"""
+    cost_tracking: Literal["default", "ignore_errors"] = os.getenv("MSWEA_COST_TRACKING", "default")
+    """Cost tracking mode for this model. Can be "default" or "ignore_errors" (ignore errors/missing cost info)"""
+    format_error_template: str = "{{ error }}"
+    """Template used when the LM's output is not in the expected format."""
+    observation_template: str = (
+        "{% if output.exception_info %}<exception>{{output.exception_info}}</exception>\n{% endif %}"
+        "<returncode>{{output.returncode}}</returncode>\n<output>\n{{output.output}}</output>"
+    )
+    """Template used to render the observation after executing an action."""
+    multimodal_regex: str = ""
+    """Regex to extract multimodal content. Empty string disables multimodal processing."""
+
+
+class LitellmModel:
+    abort_exceptions: list[type[Exception]] = [
+        litellm.exceptions.UnsupportedParamsError,
+        litellm.exceptions.NotFoundError,
+        litellm.exceptions.PermissionDeniedError,
+        litellm.exceptions.ContextWindowExceededError,
+        litellm.exceptions.AuthenticationError,
+        KeyboardInterrupt,
+    ]
+
+    def __init__(self, *, config_class: Callable = LitellmModelConfig, extra_tools: list[dict] | None = None, **kwargs):
+        self.config = config_class(**kwargs)
+        self._tools = [BASH_TOOL] + (extra_tools or [])
+        if self.config.litellm_model_registry and Path(self.config.litellm_model_registry).is_file():
+            litellm.utils.register_model(json.loads(Path(self.config.litellm_model_registry).read_text()))
+
+    def _query(self, messages: list[dict[str, str]], **kwargs):
+        try:
+            return litellm.completion(
+                model=self.config.model_name,
+                messages=messages,
+                tools=self._tools,
+                **(self.config.model_kwargs | kwargs),
+            )
+        except litellm.exceptions.AuthenticationError as e:
+            e.message += " You can permanently set your API key with `mini-extra config set KEY VALUE`."
+            raise e
+
+    def _prepare_messages_for_api(self, messages: list[dict]) -> list[dict]:
+        prepared = [{k: v for k, v in msg.items() if k != "extra"} for msg in messages]
+        prepared = _reorder_anthropic_thinking_blocks(prepared)
+        return set_cache_control(prepared, mode=self.config.set_cache_control)
+
+    def query(self, messages: list[dict[str, str]], **kwargs) -> dict:
+        for attempt in retry(logger=logger, abort_exceptions=self.abort_exceptions):
+            with attempt:
+                response = self._query(self._prepare_messages_for_api(messages), **kwargs)
+        cost_output = self._calculate_cost(response)
+        GLOBAL_MODEL_STATS.add(cost_output["cost"])
+        message = response.choices[0].message.model_dump()
+        message["extra"] = {
+            "actions": self._parse_actions(response),
+            "response": response.model_dump(),
+            **cost_output,
+            "timestamp": time.time(),
+        }
+        return message
+
+    def _calculate_cost(self, response) -> dict[str, float]:
+        try:
+            cost = litellm.cost_calculator.completion_cost(response, model=self.config.model_name)
+            if cost <= 0.0:
+                raise ValueError(f"Cost must be > 0.0, got {cost}")
+        except Exception as e:
+            cost = 0.0
+            if self.config.cost_tracking != "ignore_errors":
+                msg = (
+                    f"Error calculating cost for model {self.config.model_name}: {e}, perhaps it's not registered? "
+                    "You can ignore this issue from your config file with cost_tracking: 'ignore_errors' or "
+                    "globally with export MSWEA_COST_TRACKING='ignore_errors'. "
+                    "Alternatively check the 'Cost tracking' section in the documentation at "
+                    "https://klieret.short.gy/mini-local-models. "
+                    " Still stuck? Please open a github issue at https://github.com/SWE-agent/mini-swe-agent/issues/new/choose!"
+                )
+                logger.critical(msg)
+                raise RuntimeError(msg) from e
+        return {"cost": cost}
+
+    def _parse_actions(self, response) -> list[dict]:
+        """Parse tool calls from the response. Raises FormatError if unknown tool."""
+        tool_calls = response.choices[0].message.tool_calls or []
+        return parse_toolcall_actions(tool_calls, format_error_template=self.config.format_error_template)
+
+    def format_message(self, **kwargs) -> dict:
+        return expand_multimodal_content(kwargs, pattern=self.config.multimodal_regex)
+
+    def format_observation_messages(
+        self, message: dict, outputs: list[dict], template_vars: dict | None = None
+    ) -> list[dict]:
+        """Format execution outputs into tool result messages."""
+        actions = message.get("extra", {}).get("actions", [])
+        return format_toolcall_observation_messages(
+            actions=actions,
+            outputs=outputs,
+            observation_template=self.config.observation_template,
+            template_vars=template_vars,
+            multimodal_regex=self.config.multimodal_regex,
+        )
+
+    def get_template_vars(self, **kwargs) -> dict[str, Any]:
+        return self.config.model_dump()
+
+    def serialize(self) -> dict:
+        return {
+            "info": {
+                "config": {
+                    "model": self.config.model_dump(mode="json"),
+                    "model_type": f"{self.__class__.__module__}.{self.__class__.__name__}",
+                },
+            }
+        }
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/models/utils/__init__.py b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/models/utils/actions_toolcall.py b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/actions_toolcall.py
new file mode 100644
index 0000000..47a8a34
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/actions_toolcall.py
@@ -0,0 +1,137 @@
+"""Parse actions & format observations with toolcalls"""
+
+import json
+import time
+
+from jinja2 import StrictUndefined, Template
+
+from cooperbench.agents.mini_swe_agent_v2.exceptions import FormatError
+from cooperbench.agents.mini_swe_agent_v2.models.utils.openai_multimodal import expand_multimodal_content
+
+BASH_TOOL = {
+    "type": "function",
+    "function": {
+        "name": "bash",
+        "description": "Execute a bash command",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "command": {
+                    "type": "string",
+                    "description": "The bash command to execute",
+                }
+            },
+            "required": ["command"],
+        },
+    },
+}
+
+SEND_MESSAGE_TOOL = {
+    "type": "function",
+    "function": {
+        "name": "send_message",
+        "description": "Send a message to another agent for inter-agent communication",
+        "parameters": {
+            "type": "object",
+            "properties": {
+                "recipient": {
+                    "type": "string",
+                    "description": "The agent ID to send the message to",
+                },
+                "content": {
+                    "type": "string",
+                    "description": "The message content to send",
+                },
+            },
+            "required": ["recipient", "content"],
+        },
+    },
+}
+
+KNOWN_TOOLS = {"bash", "send_message"}
+
+
+def parse_toolcall_actions(tool_calls: list, *, format_error_template: str) -> list[dict]:
+    """Parse tool calls from the response. Raises FormatError if unknown tool or invalid args."""
+    if not tool_calls:
+        raise FormatError(
+            {
+                "role": "user",
+                "content": Template(format_error_template, undefined=StrictUndefined).render(
+                    error="No tool calls found in the response. Every response MUST include at least one tool call."
+                ),
+                "extra": {"interrupt_type": "FormatError"},
+            }
+        )
+    actions = []
+    for tool_call in tool_calls:
+        error_msg = ""
+        args = {}
+        try:
+            args = json.loads(tool_call.function.arguments)
+        except Exception as e:
+            error_msg = f"Error parsing tool call arguments: {e}. "
+
+        tool_name = tool_call.function.name
+
+        if tool_name not in KNOWN_TOOLS:
+            error_msg += f"Unknown tool '{tool_name}'."
+        elif tool_name == "bash" and "command" not in args:
+            error_msg += "Missing 'command' argument in bash tool call."
+        elif tool_name == "send_message":
+            if "recipient" not in args:
+                error_msg += "Missing 'recipient' argument in send_message tool call."
+            if "content" not in args:
+                error_msg += "Missing 'content' argument in send_message tool call."
+
+        if error_msg:
+            raise FormatError(
+                {
+                    "role": "user",
+                    "content": Template(format_error_template, undefined=StrictUndefined).render(
+                        error=error_msg.strip()
+                    ),
+                    "extra": {"interrupt_type": "FormatError"},
+                }
+            )
+
+        action = {"tool_name": tool_name, "tool_call_id": tool_call.id, **args}
+        actions.append(action)
+    return actions
+
+
+def format_toolcall_observation_messages(
+    *,
+    actions: list[dict],
+    outputs: list[dict],
+    observation_template: str,
+    template_vars: dict | None = None,
+    multimodal_regex: str = "",
+) -> list[dict]:
+    """Format execution outputs into tool result messages."""
+    not_executed = {"output": "", "returncode": -1, "exception_info": "action was not executed"}
+    padded_outputs = outputs + [not_executed] * (len(actions) - len(outputs))
+    results = []
+    for action, output in zip(actions, padded_outputs):
+        content = Template(observation_template, undefined=StrictUndefined).render(
+            output=output, **(template_vars or {})
+        )
+        msg = {
+            "content": content,
+            "extra": {
+                "raw_output": output.get("output", ""),
+                "returncode": output.get("returncode"),
+                "timestamp": time.time(),
+                "exception_info": output.get("exception_info"),
+                **output.get("extra", {}),
+            },
+        }
+        if "tool_call_id" in action:
+            msg["tool_call_id"] = action["tool_call_id"]
+            msg["role"] = "tool"
+        else:
+            msg["role"] = "user"  # human issued commands
+        if multimodal_regex:
+            msg = expand_multimodal_content(msg, pattern=multimodal_regex)
+        results.append(msg)
+    return results
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/models/utils/anthropic_utils.py b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/anthropic_utils.py
new file mode 100644
index 0000000..6636df2
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/anthropic_utils.py
@@ -0,0 +1,28 @@
+"""Utilities for Anthropic API compatibility."""
+
+
+def _is_anthropic_thinking_block(block) -> bool:
+    """Check if a content block is a thinking-type block."""
+    if not isinstance(block, dict):
+        return False
+    return block.get("type") in ("thinking", "redacted_thinking")
+
+
+def _reorder_anthropic_thinking_blocks(messages: list[dict]) -> list[dict]:
+    """Reorder thinking blocks so they are not the final block in assistant messages.
+
+    This is an Anthropic API requirement: thinking blocks must come before other blocks.
+    """
+    result = []
+    for msg in messages:
+        if msg.get("role") == "assistant" and isinstance(msg.get("content"), list):
+            content = msg["content"]
+            thinking_blocks = [b for b in content if _is_anthropic_thinking_block(b)]
+            if thinking_blocks:
+                other_blocks = [b for b in content if not _is_anthropic_thinking_block(b)]
+                if other_blocks:
+                    msg = {**msg, "content": thinking_blocks + other_blocks}
+                else:
+                    msg = {**msg, "content": thinking_blocks + [{"type": "text", "text": ""}]}
+        result.append(msg)
+    return result
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/models/utils/cache_control.py b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/cache_control.py
new file mode 100644
index 0000000..9c50ee0
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/cache_control.py
@@ -0,0 +1,67 @@
+"""Cache control utilities are mostly for Anthropic models.
+They are used to explicitly set cache control points.
+"""
+
+import copy
+import warnings
+from typing import Literal
+
+
+def _get_content_text(entry: dict) -> str | None:
+    if entry["content"] is None:
+        return None
+    if isinstance(entry["content"], str):
+        return entry["content"]
+    assert len(entry["content"]) == 1, "Expected single message in content"
+    return entry["content"][0]["text"]
+
+
+def _clear_cache_control(entry: dict) -> None:
+    if isinstance(entry["content"], list):
+        assert len(entry["content"]) == 1, "Expected single message in content"
+        entry["content"][0].pop("cache_control", None)
+    # Note: entry["content"] can be None for assistant messages with only tool_use
+    entry.pop("cache_control", None)
+
+
+def _set_cache_control(entry: dict) -> None:
+    # Handle None content (e.g., assistant messages with only tool_use)
+    if entry["content"] is None:
+        entry["cache_control"] = {"type": "ephemeral"}
+        return
+
+    if not isinstance(entry["content"], list):
+        entry["content"] = [  # type: ignore
+            {
+                "type": "text",
+                "text": _get_content_text(entry),
+                "cache_control": {"type": "ephemeral"},
+            }
+        ]
+    else:
+        entry["content"][0]["cache_control"] = {"type": "ephemeral"}
+    if entry["role"] == "tool":
+        # Workaround for weird bug
+        entry["content"][0].pop("cache_control", None)
+        entry["cache_control"] = {"type": "ephemeral"}
+
+
+def set_cache_control(
+    messages: list[dict], *, mode: Literal["default_end"] | None = "default_end", last_n_messages_offset: int = 0
+) -> list[dict]:
+    """This messages processor adds manual cache control marks to the messages."""
+    if mode is None:
+        return messages
+    if mode != "default_end":
+        raise ValueError(f"Invalid mode: {mode}")
+    if last_n_messages_offset:
+        warnings.warn("last_n_messages_offset is deprecated and will be removed in the future. It has no effect.")
+
+    messages = copy.deepcopy(messages)
+    new_messages = []
+    for i_entry, entry in enumerate(reversed(messages)):
+        _clear_cache_control(entry)
+        if i_entry == 0:
+            _set_cache_control(entry)
+        new_messages.append(entry)
+    return list(reversed(new_messages))
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/models/utils/openai_multimodal.py b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/openai_multimodal.py
new file mode 100644
index 0000000..4f76fa5
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/openai_multimodal.py
@@ -0,0 +1,50 @@
+"""Utilities for handling multimodal content in OpenAI-style messages."""
+
+import copy
+import re
+from typing import Any
+
+DEFAULT_MULTIMODAL_REGEX = (
+    r"(?s)<MSWEA_MULTIMODAL_CONTENT><CONTENT_TYPE>(.+?)</CONTENT_TYPE>(.+?)</MSWEA_MULTIMODAL_CONTENT>"
+)
+
+
+def _expand_content_string(*, content: str, pattern: str) -> list[dict]:
+    """Expand a content string, replacing multimodal tags with structured content."""
+    matches = list(re.finditer(pattern, content))
+    if not matches:
+        return [{"type": "text", "text": content}]
+    result = []
+    last_end = 0
+    for match in matches:
+        text_before = content[last_end : match.start()]
+        if text_before:
+            result.append({"type": "text", "text": text_before})
+        content_type = match.group(1).strip()
+        extracted = match.group(2).strip()
+        if content_type == "image_url":
+            result.append({"type": "image_url", "image_url": {"url": extracted}})
+        last_end = match.end()
+    text_after = content[last_end:]
+    if text_after:
+        result.append({"type": "text", "text": text_after})
+    return result
+
+
+def expand_multimodal_content(content: Any, *, pattern: str) -> Any:
+    """Recursively expand multimodal content in messages.
+    Note: Returns copy of content, original content is not modified.
+    """
+    if not pattern:
+        return content
+    content = copy.deepcopy(content)
+    if isinstance(content, str):
+        return _expand_content_string(content=content, pattern=pattern)
+    if isinstance(content, list):
+        return [expand_multimodal_content(item, pattern=pattern) for item in content]
+    if isinstance(content, dict):
+        if "content" not in content:
+            return content
+        content["content"] = expand_multimodal_content(content["content"], pattern=pattern)
+        return content
+    return str(content)
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/models/utils/retry.py b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/retry.py
new file mode 100644
index 0000000..055d4b6
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/retry.py
@@ -0,0 +1,25 @@
+"""Retry utility for model queries."""
+
+import logging
+import os
+
+from tenacity import Retrying, before_sleep_log, retry_if_not_exception_type, stop_after_attempt, wait_exponential
+
+
+def retry(*, logger: logging.Logger, abort_exceptions: list[type[Exception]]) -> Retrying:
+    """Thin wrapper around tenacity.Retrying to make use of global config etc.
+
+    Args:
+        logger: Logger to use for reporting retries
+        abort_exceptions: Exceptions to abort on.
+
+    Returns:
+        A tenacity.Retrying object.
+    """
+    return Retrying(
+        reraise=True,
+        stop=stop_after_attempt(int(os.getenv("MSWEA_MODEL_RETRY_STOP_AFTER_ATTEMPT", "10"))),
+        wait=wait_exponential(multiplier=1, min=4, max=60),
+        before_sleep=before_sleep_log(logger, logging.WARNING),
+        retry=retry_if_not_exception_type(tuple(abort_exceptions)),
+    )
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/utils/__init__.py b/src/cooperbench/agents/mini_swe_agent_v2/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/utils/log.py b/src/cooperbench/agents/mini_swe_agent_v2/utils/log.py
new file mode 100644
index 0000000..8069455
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/utils/log.py
@@ -0,0 +1,36 @@
+import logging
+from pathlib import Path
+
+from rich.logging import RichHandler
+
+
+def _setup_root_logger() -> None:
+    logger = logging.getLogger("mini_swe_agent_v2")
+    logger.setLevel(logging.DEBUG)
+    _handler = RichHandler(
+        show_path=False,
+        show_time=False,
+        show_level=False,
+        markup=True,
+    )
+    _formatter = logging.Formatter("%(name)s: %(levelname)s: %(message)s")
+    _handler.setFormatter(_formatter)
+    logger.addHandler(_handler)
+
+
+def add_file_handler(path: Path | str, level: int = logging.DEBUG, *, print_path: bool = True) -> None:
+    logger = logging.getLogger("mini_swe_agent_v2")
+    handler = logging.FileHandler(path)
+    handler.setLevel(level)
+    formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+    handler.setFormatter(formatter)
+    logger.addHandler(handler)
+    if print_path:
+        print(f"Logging to '{path}'")
+
+
+_setup_root_logger()
+logger = logging.getLogger("mini_swe_agent_v2")
+
+
+__all__ = ["logger"]
diff --git a/src/cooperbench/agents/mini_swe_agent_v2/utils/serialize.py b/src/cooperbench/agents/mini_swe_agent_v2/utils/serialize.py
new file mode 100644
index 0000000..ccd5947
--- /dev/null
+++ b/src/cooperbench/agents/mini_swe_agent_v2/utils/serialize.py
@@ -0,0 +1,29 @@
+from typing import Any
+
+UNSET = object()
+
+
+def recursive_merge(*dictionaries: dict | None) -> dict:
+    """Merge multiple dictionaries recursively.
+
+    Later dictionaries take precedence over earlier ones.
+    Nested dictionaries are merged recursively.
+    UNSET values are skipped.
+    """
+    if not dictionaries:
+        return {}
+    result: dict[str, Any] = {}
+    for d in dictionaries:
+        if d is None:
+            continue
+        for key, value in d.items():
+            if value is UNSET:
+                continue
+            if key in result and isinstance(result[key], dict) and isinstance(value, dict):
+                result[key] = recursive_merge(result[key], value)
+            elif isinstance(value, dict):
+                # Recursively merge dict values to filter out nested UNSET values
+                result[key] = recursive_merge(value)
+            else:
+                result[key] = value
+    return result
diff --git a/src/cooperbench/agents/registry.py b/src/cooperbench/agents/registry.py
index cc14d36..22f71ef 100644
--- a/src/cooperbench/agents/registry.py
+++ b/src/cooperbench/agents/registry.py
@@ -81,6 +81,10 @@ def _auto_register():
         import cooperbench.agents.openhands_agent_sdk.adapter  # noqa: F401
     except ImportError:
         pass
+    try:
+        import cooperbench.agents.mini_swe_agent_v2.adapter  # noqa: F401
+    except ImportError:
+        pass
 
     # External agents via environment variable
     import os