From f06c3196d0621b10d03261e80c6e800a8b2dab7b Mon Sep 17 00:00:00 2001 From: akhatua2 Date: Sat, 14 Feb 2026 20:17:48 -0800 Subject: [PATCH] Adding support for minisweagent_v2 --- pyproject.toml | 3 +- src/cooperbench/agents/__init__.py | 1 + .../mini_swe_agent/environments/modal.py | 3 +- .../agents/mini_swe_agent_v2/__init__.py | 102 ++++ .../agents/mini_swe_agent_v2/adapter.py | 145 +++++ .../mini_swe_agent_v2/agents/__init__.py | 1 + .../mini_swe_agent_v2/agents/default.py | 205 +++++++ .../mini_swe_agent_v2/config/__init__.py | 62 ++ .../agents/mini_swe_agent_v2/config/mini.yaml | 147 +++++ .../mini_swe_agent_v2/connectors/__init__.py | 21 + .../mini_swe_agent_v2/connectors/git.py | 132 +++++ .../connectors/git_servers/__init__.py | 89 +++ .../connectors/git_servers/base.py | 24 + .../connectors/git_servers/docker.py | 175 ++++++ .../connectors/git_servers/gcp.py | 544 ++++++++++++++++++ .../connectors/git_servers/modal.py | 139 +++++ .../mini_swe_agent_v2/connectors/messaging.py | 113 ++++ .../environments/__init__.py | 1 + .../mini_swe_agent_v2/environments/docker.py | 161 ++++++ .../mini_swe_agent_v2/environments/modal.py | 264 +++++++++ .../agents/mini_swe_agent_v2/exceptions.py | 22 + .../mini_swe_agent_v2/models/__init__.py | 34 ++ .../mini_swe_agent_v2/models/litellm_model.py | 149 +++++ .../models/utils/__init__.py | 0 .../models/utils/actions_toolcall.py | 137 +++++ .../models/utils/anthropic_utils.py | 28 + .../models/utils/cache_control.py | 67 +++ .../models/utils/openai_multimodal.py | 50 ++ .../mini_swe_agent_v2/models/utils/retry.py | 25 + .../mini_swe_agent_v2/utils/__init__.py | 0 .../agents/mini_swe_agent_v2/utils/log.py | 36 ++ .../mini_swe_agent_v2/utils/serialize.py | 29 + src/cooperbench/agents/registry.py | 4 + 33 files changed, 2910 insertions(+), 3 deletions(-) create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/__init__.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/adapter.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/agents/__init__.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/agents/default.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/config/__init__.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/config/mini.yaml create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/connectors/__init__.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/connectors/git.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/__init__.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/base.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/docker.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/gcp.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/modal.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/connectors/messaging.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/environments/__init__.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/environments/docker.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/environments/modal.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/exceptions.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/models/__init__.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/models/litellm_model.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/models/utils/__init__.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/models/utils/actions_toolcall.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/models/utils/anthropic_utils.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/models/utils/cache_control.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/models/utils/openai_multimodal.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/models/utils/retry.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/utils/__init__.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/utils/log.py create mode 100644 src/cooperbench/agents/mini_swe_agent_v2/utils/serialize.py diff --git a/pyproject.toml b/pyproject.toml index 3c53086..94d4b71 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -85,6 +85,7 @@ swe-agent = [ "unidiff>=0.7", ] gcp = [ + "cooperbench", "google-cloud-batch>=0.17", "google-cloud-compute>=1.0", "google-cloud-storage>=2.0", @@ -186,7 +187,7 @@ exclude_lines = [ ] [tool.uv.sources] -cooperbench = { workspace = true } openhands-sdk = { path = "src/cooperbench/agents/openhands_agent_sdk/openhands-sdk", editable = true } openhands-tools = { path = "src/cooperbench/agents/openhands_agent_sdk/openhands-tools", editable = true } openhands-workspace = { path = "src/cooperbench/agents/openhands_agent_sdk/openhands-workspace", editable = true } +cooperbench = { workspace = true } diff --git a/src/cooperbench/agents/__init__.py b/src/cooperbench/agents/__init__.py index 6ad8c62..631f169 100644 --- a/src/cooperbench/agents/__init__.py +++ b/src/cooperbench/agents/__init__.py @@ -104,6 +104,7 @@ def run( # Add your agent's shorthand here when registering a new adapter AGENT_SHORTHANDS = { "mini_swe_agent": "msa", + "mini_swe_agent_v2": "msa_v2", "swe_agent": "sw", "openhands_sdk": "oh", } diff --git a/src/cooperbench/agents/mini_swe_agent/environments/modal.py b/src/cooperbench/agents/mini_swe_agent/environments/modal.py index 714b2c2..42e54b7 100644 --- a/src/cooperbench/agents/mini_swe_agent/environments/modal.py +++ b/src/cooperbench/agents/mini_swe_agent/environments/modal.py @@ -69,8 +69,7 @@ def _get_or_build_image(image_name: str) -> modal.Image: if image_name in _image_cache: return _image_cache[image_name] - # Build and cache (CACHE_BUST env forces new image hash) TODO: @arpan remove this after testing - image = modal.Image.from_registry(image_name).entrypoint([]).env({"COOPERBENCH_CACHE": "v6"}) + image = modal.Image.from_registry(image_name).entrypoint([]) _image_cache[image_name] = image return image diff --git a/src/cooperbench/agents/mini_swe_agent_v2/__init__.py b/src/cooperbench/agents/mini_swe_agent_v2/__init__.py new file mode 100644 index 0000000..207db03 --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/__init__.py @@ -0,0 +1,102 @@ +""" +mini-swe-agent v2 - A Minimal AI Agent for Software Engineering (Tool-calling version) + +Source: https://github.com/SWE-agent/mini-swe-agent +Version: 2.1.0 (commit 56613dd) +License: MIT +Copyright (c) 2025 Kilian A. Lieret and Carlos E. Jimenez + +This code is copied directly from mini-swe-agent with modifications: +- Import paths changed from 'minisweagent' to 'cooperbench.agents.mini_swe_agent_v2' +- Removed text-based (regex) action parsing (v1 compat), keeping only tool calling +- Removed interactive agent, non-Docker environments, non-litellm models +- Added inter-agent communication (messaging + git connectors) for CooperBench + +Citation: + @misc{minisweagent2025, + title={mini-swe-agent: A Minimal AI Agent for Software Engineering}, + author={Lieret, Kilian A. and Jimenez, Carlos E.}, + year={2025}, + url={https://github.com/SWE-agent/mini-swe-agent} + } + +This file provides: +- Path settings for global config file & relative directories +- Version numbering +- Protocols for the core components of mini-swe-agent. +""" + +__version__ = "2.1.0" + +import os +from pathlib import Path +from typing import Any, Protocol + +import dotenv +from platformdirs import user_config_dir + +from cooperbench.agents.mini_swe_agent_v2.utils.log import logger + +package_dir = Path(__file__).resolve().parent + +global_config_dir = Path(os.getenv("MSWEA_GLOBAL_CONFIG_DIR") or user_config_dir("mini-swe-agent")) +global_config_dir.mkdir(parents=True, exist_ok=True) +global_config_file = Path(global_config_dir) / ".env" + +dotenv.load_dotenv(dotenv_path=global_config_file) + + +# === Protocols === +# You can ignore them unless you want static type checking. + + +class Model(Protocol): + """Protocol for language models.""" + + config: Any + + def query(self, messages: list[dict[str, str]], **kwargs) -> dict: ... + + def format_message(self, **kwargs) -> dict: ... + + def format_observation_messages( + self, message: dict, outputs: list[dict], template_vars: dict | None = None + ) -> list[dict]: ... + + def get_template_vars(self, **kwargs) -> dict[str, Any]: ... + + def serialize(self) -> dict: ... + + +class Environment(Protocol): + """Protocol for execution environments.""" + + config: Any + + def execute(self, action: dict, cwd: str = "") -> dict[str, Any]: ... + + def get_template_vars(self, **kwargs) -> dict[str, Any]: ... + + def serialize(self) -> dict: ... + + +class Agent(Protocol): + """Protocol for agents.""" + + config: Any + + def run(self, task: str, **kwargs) -> dict: ... + + def save(self, path: Path | None, *extra_dicts) -> dict: ... + + +__all__ = [ + "Agent", + "Model", + "Environment", + "package_dir", + "__version__", + "global_config_file", + "global_config_dir", + "logger", +] diff --git a/src/cooperbench/agents/mini_swe_agent_v2/adapter.py b/src/cooperbench/agents/mini_swe_agent_v2/adapter.py new file mode 100644 index 0000000..c9bcbf7 --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/adapter.py @@ -0,0 +1,145 @@ +"""Mini-SWE-Agent v2 adapter for CooperBench. + +This adapter wraps the mini-swe-agent v2 framework (tool-calling version) +to conform to the AgentRunner interface used by CooperBench. +""" + +import yaml + +from cooperbench.agents import AgentResult +from cooperbench.agents.mini_swe_agent_v2.agents.default import DefaultAgent +from cooperbench.agents.mini_swe_agent_v2.config import get_config_path +from cooperbench.agents.mini_swe_agent_v2.connectors import GitConnector +from cooperbench.agents.mini_swe_agent_v2.connectors.messaging import MessagingConnector +from cooperbench.agents.mini_swe_agent_v2.models.litellm_model import LitellmModel +from cooperbench.agents.mini_swe_agent_v2.models.utils.actions_toolcall import SEND_MESSAGE_TOOL +from cooperbench.agents.registry import register + + +@register("mini_swe_agent_v2") +class MiniSweAgentV2Runner: + """Adapter for mini-swe-agent v2 framework (tool-calling).""" + + def run( + self, + task: str, + image: str, + *, + agent_id: str = "agent", + model_name: str = "gpt-4o", + agents: list[str] | None = None, + comm_url: str | None = None, + git_server_url: str | None = None, + git_enabled: bool = False, + messaging_enabled: bool = True, + config: dict | None = None, + agent_config: str | None = None, + log_dir: str | None = None, + ) -> AgentResult: + """Run mini-swe-agent v2 on a task.""" + # Always load default config, then merge with any overrides + config_path = get_config_path("mini") + with open(config_path) as f: + default_config = yaml.safe_load(f) + + # Merge passed config overrides into default config + if config is not None: + default_config.update(config) + + agent_cfg = default_config.get("agent", {}) + model_cfg = default_config.get("model", {}) + env_cfg = default_config.get("environment", {}) + backend = default_config.get("backend", "modal") + + # Create environment based on backend + env_kwargs = { + "image": image, + "cwd": "/workspace/repo", + "timeout": 3600, + } + if env_cfg.get("env"): + env_kwargs["env"] = env_cfg["env"] + + if backend == "docker": + from cooperbench.agents.mini_swe_agent_v2.environments.docker import DockerEnvironment + + if config and config.get("git_network"): + env_kwargs["network"] = config["git_network"] + env = DockerEnvironment(**env_kwargs) + else: + from cooperbench.agents.mini_swe_agent_v2.environments.modal import ModalEnvironment + + env = ModalEnvironment(**env_kwargs) + + # Capture base commit for patch generation + base_commit_result = env.execute({"command": "git rev-parse HEAD"}) + base_commit = base_commit_result.get("output", "").strip() + + # Setup messaging connector if enabled + comm = None + use_messaging = messaging_enabled and comm_url and agents and len(agents) > 1 + if use_messaging: + comm = MessagingConnector(agent_id=agent_id, agents=agents, url=comm_url) + + # Create LLM model with send_message tool if messaging is enabled + extra_tools = [SEND_MESSAGE_TOOL] if use_messaging else None + model = LitellmModel(model_name=model_name, extra_tools=extra_tools, **model_cfg) + + # Setup git connector if enabled + if git_enabled and git_server_url and agents: + git_connector = GitConnector( + agent_id=agent_id, + agents=agents, + server_url=git_server_url, + ) + git_connector.setup(env) + + # Create agent with template variables for collaboration + extra_vars = { + "agent_id": agent_id if (agents and len(agents) > 1) else None, + "agents": agents if agents else [], + "git_enabled": git_enabled, + "messaging_enabled": messaging_enabled, + } + + agent = DefaultAgent( + model=model, + env=env, + comm=comm, + agent_id=agent_id, + **agent_cfg, + ) + agent.extra_template_vars.update(extra_vars) + + # Run agent + error_msg = None + try: + result = agent.run(task=task) + status = result.get("exit_status", "Submitted") + except Exception as e: + status = "Error" + error_msg = str(e) + + # Extract patch (committed + uncommitted changes) + patch = self._get_patch(env, base_commit) + + # Cleanup + env.cleanup() + + return AgentResult( + status=status, + patch=patch, + cost=agent.cost, + steps=agent.n_calls, + messages=agent.messages, + sent_messages=agent.sent_messages, + error=error_msg, + ) + + def _get_patch(self, env, base_commit: str) -> str: + """Extract git diff from base commit to current working tree state.""" + try: + result = env.execute({"command": f"git diff {base_commit}"}) + return result.get("output", "").strip() + except Exception: + return "" diff --git a/src/cooperbench/agents/mini_swe_agent_v2/agents/__init__.py b/src/cooperbench/agents/mini_swe_agent_v2/agents/__init__.py new file mode 100644 index 0000000..d2df172 --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/agents/__init__.py @@ -0,0 +1 @@ +"""Agent implementations for mini-SWE-agent v2.""" diff --git a/src/cooperbench/agents/mini_swe_agent_v2/agents/default.py b/src/cooperbench/agents/mini_swe_agent_v2/agents/default.py new file mode 100644 index 0000000..75e2f47 --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/agents/default.py @@ -0,0 +1,205 @@ +"""Basic agent class. See https://mini-swe-agent.com/latest/advanced/control_flow/ for visual explanation +or https://minimal-agent.com for a tutorial on the basic building principles. +""" + +import json +import logging +import traceback +from pathlib import Path + +from jinja2 import StrictUndefined, Template +from pydantic import BaseModel + +from cooperbench.agents.mini_swe_agent_v2 import Environment, Model, __version__ +from cooperbench.agents.mini_swe_agent_v2.connectors.messaging import MessagingConnector +from cooperbench.agents.mini_swe_agent_v2.exceptions import InterruptAgentFlow, LimitsExceeded +from cooperbench.agents.mini_swe_agent_v2.utils.serialize import recursive_merge + + +class AgentConfig(BaseModel): + """Check the config files in config/ for example settings.""" + + system_template: str + """Template for the system message (the first message).""" + instance_template: str + """Template for the first user message specifying the task (the second message overall).""" + step_limit: int = 0 + """Maximum number of steps the agent can take.""" + cost_limit: float = 3.0 + """Stop agent after exceeding (!) this cost.""" + output_path: Path | None = None + """Save the trajectory to this path.""" + + +class DefaultAgent: + def __init__( + self, + model: Model, + env: Environment, + *, + comm: MessagingConnector | None = None, + agent_id: str = "agent", + config_class: type = AgentConfig, + **kwargs, + ): + """See the `AgentConfig` class for permitted keyword arguments.""" + self.config = config_class(**kwargs) + self.messages: list[dict] = [] + self.model = model + self.env = env + self.comm = comm + self.agent_id = agent_id + self.extra_template_vars = {} + self.logger = logging.getLogger("agent") + self.cost = 0.0 + self.n_calls = 0 + self.sent_messages: list[dict] = [] + + def log(self, msg: str): + """Log message with agent prefix.""" + self.logger.debug(f"[{self.agent_id}] {msg}") + + def get_template_vars(self, **kwargs) -> dict: + return recursive_merge( + self.config.model_dump(), + self.env.get_template_vars(), + self.model.get_template_vars(), + {"n_model_calls": self.n_calls, "model_cost": self.cost}, + self.extra_template_vars, + kwargs, + ) + + def _render_template(self, template: str) -> str: + return Template(template, undefined=StrictUndefined).render(**self.get_template_vars()) + + def add_messages(self, *messages: dict) -> list[dict]: + self.logger.debug(messages) # set log level to debug to see + self.messages.extend(messages) + return list(messages) + + def handle_uncaught_exception(self, e: Exception) -> list[dict]: + return self.add_messages( + self.model.format_message( + role="exit", + content=str(e), + extra={ + "exit_status": type(e).__name__, + "submission": "", + "exception_str": str(e), + "traceback": traceback.format_exc(), + }, + ) + ) + + def run(self, task: str = "", **kwargs) -> dict: + """Run step() until agent is finished. Returns dictionary with exit_status, submission keys.""" + self.extra_template_vars |= {"task": task, **kwargs} + self.messages = [] + self.add_messages( + self.model.format_message(role="system", content=self._render_template(self.config.system_template)), + self.model.format_message(role="user", content=self._render_template(self.config.instance_template)), + ) + while True: + try: + self.step() + except InterruptAgentFlow as e: + self.add_messages(*e.messages) + except Exception as e: + self.handle_uncaught_exception(e) + raise + finally: + self.save(self.config.output_path) + if self.messages[-1].get("role") == "exit": + break + return self.messages[-1].get("extra", {}) + + def step(self) -> list[dict]: + """Query the LM, execute actions. Polls for inter-agent messages before querying.""" + # Check for inter-agent messages before querying LLM + if self.comm: + messages = self.comm.receive() + for msg in messages: + ts = msg.get("timestamp", "")[:19].replace("T", " ") + self.log(f"INBOX: [{msg['from']} @ {ts}] {msg['content']}") + self.add_messages( + self.model.format_message( + role="user", + content=f"[Message from {msg['from']}]: {msg['content']}", + ) + ) + return self.execute_actions(self.query()) + + def query(self) -> dict: + """Query the model and return model messages. Override to add hooks.""" + if 0 < self.config.step_limit <= self.n_calls or 0 < self.config.cost_limit <= self.cost: + raise LimitsExceeded( + { + "role": "exit", + "content": "LimitsExceeded", + "extra": {"exit_status": "LimitsExceeded", "submission": ""}, + } + ) + self.n_calls += 1 + message = self.model.query(self.messages) + self.cost += message.get("extra", {}).get("cost", 0.0) + self.add_messages(message) + return message + + def execute_actions(self, message: dict) -> list[dict]: + """Execute actions in message, add observation messages, return them. + + Handles both bash and send_message tool calls. + """ + actions = message.get("extra", {}).get("actions", []) + outputs = [] + for action in actions: + tool_name = action.get("tool_name", "bash") + if tool_name == "send_message" and self.comm: + output = self._handle_send_message(action) + else: + outputs.append(self.env.execute(action)) + continue + outputs.append(output) + return self.add_messages(*self.model.format_observation_messages(message, outputs, self.get_template_vars())) + + def _handle_send_message(self, action: dict) -> dict: + """Handle a send_message tool call via the messaging connector.""" + recipient = action.get("recipient", "") + content = action.get("content", "") + self.comm.send(recipient, content) + self.log(f"SENT to {recipient}: {content[:80]}...") + self.sent_messages.append({"to": recipient, "content": content}) + return {"output": f"Message sent to {recipient}", "returncode": 0} + + def serialize(self, *extra_dicts) -> dict: + """Serialize agent state to a json-compatible nested dictionary for saving.""" + last_message = self.messages[-1] if self.messages else {} + last_extra = last_message.get("extra", {}) + agent_data = { + "info": { + "model_stats": { + "instance_cost": self.cost, + "api_calls": self.n_calls, + }, + "config": { + "agent": self.config.model_dump(mode="json"), + "agent_type": f"{self.__class__.__module__}.{self.__class__.__name__}", + }, + "mini_version": __version__, + "exit_status": last_extra.get("exit_status", ""), + "submission": last_extra.get("submission", ""), + }, + "messages": self.messages, + "trajectory_format": "mini-swe-agent-1.1", + } + return recursive_merge(agent_data, self.model.serialize(), self.env.serialize(), *extra_dicts) + + def save(self, path: Path | None, *extra_dicts) -> dict: + """Save the trajectory of the agent to a file if path is given. Returns full serialized data. + You can pass additional dictionaries with extra data to be (recursively) merged into the output data. + """ + data = self.serialize(*extra_dicts) + if path: + path.parent.mkdir(parents=True, exist_ok=True) + path.write_text(json.dumps(data, indent=2)) + return data diff --git a/src/cooperbench/agents/mini_swe_agent_v2/config/__init__.py b/src/cooperbench/agents/mini_swe_agent_v2/config/__init__.py new file mode 100644 index 0000000..54ef179 --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/config/__init__.py @@ -0,0 +1,62 @@ +"""Configuration files and utilities for mini-SWE-agent.""" + +import json +import os +from pathlib import Path + +import yaml + +builtin_config_dir = Path(__file__).parent + + +def get_config_path(config_spec: str | Path) -> Path: + """Get the path to a config file.""" + config_spec = Path(config_spec) + if config_spec.suffix != ".yaml": + config_spec = config_spec.with_suffix(".yaml") + candidates = [ + Path(config_spec), + Path(os.getenv("MSWEA_CONFIG_DIR", ".")) / config_spec, + builtin_config_dir / config_spec, + builtin_config_dir / "extra" / config_spec, + builtin_config_dir / "benchmarks" / config_spec, + ] + for candidate in candidates: + if candidate.exists(): + return candidate + + raise FileNotFoundError(f"Could not find config file for {config_spec} (tried: {candidates})") + + +def _key_value_spec_to_nested_dict(config_spec: str) -> dict: + """Interpret key-value specs from the command line. + + Example: + + "model.model_name=anthropic/claude-sonnet-4-5-20250929" -> + {"model": {"model_name": "anthropic/claude-sonnet-4-5-20250929"}} + """ + key, value = config_spec.split("=", 1) + try: + value = json.loads(value) + except json.JSONDecodeError: + pass + keys = key.split(".") + result = {} + current = result + for k in keys[:-1]: + current[k] = {} + current = current[k] + current[keys[-1]] = value + return result + + +def get_config_from_spec(config_spec: str | Path) -> dict: + """Get a config from a config spec.""" + if isinstance(config_spec, str) and "=" in config_spec: + return _key_value_spec_to_nested_dict(config_spec) + path = get_config_path(config_spec) + return yaml.safe_load(path.read_text()) + + +__all__ = ["builtin_config_dir", "get_config_path", "get_config_from_spec", "_key_value_spec_to_nested_dict"] diff --git a/src/cooperbench/agents/mini_swe_agent_v2/config/mini.yaml b/src/cooperbench/agents/mini_swe_agent_v2/config/mini.yaml new file mode 100644 index 0000000..b52ad8b --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/config/mini.yaml @@ -0,0 +1,147 @@ +agent: + system_template: | + You are a helpful assistant that can interact with a computer. + instance_template: | + Please solve this issue: {{task}} + + You can execute bash commands and edit files to implement the necessary changes. + + ## Recommended Workflow + + This workflows should be done step-by-step so that you can iterate on your changes and any possible problems. + + 1. Analyze the codebase by finding and reading relevant files + 2. Create a script to reproduce the issue + 3. Edit the source code to resolve the issue + 4. Verify your fix works by running your script again + 5. Test edge cases to ensure your fix is robust + 6. Submit your changes and finish your work by issuing the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`. + Do not combine it with any other command. After this command, you cannot continue working on this task. + + ## Command Execution Rules + + You are operating in an environment where + + 1. You issue at least one command + 2. The system executes the command(s) in a subshell + 3. You see the result(s) + 4. You write your next command(s) + + Each response should include: + + 1. **Reasoning text** where you explain your analysis and plan + 2. At least one tool call with your command + + **CRITICAL REQUIREMENTS:** + + - Your response SHOULD include reasoning text explaining what you're doing + - Your response MUST include AT LEAST ONE bash tool call + - Directory or environment variable changes are not persistent. Every action is executed in a new subshell. + - However, you can prefix any action with `MY_ENV_VAR=MY_VALUE cd /path/to/working/dir && ...` or write/load environment variables from files + - Submit your changes and finish your work by issuing the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT`. + Do not combine it with any other command. After this command, you cannot continue working on this task. + + Example of a CORRECT response: + + I need to understand the structure of the repository first. Let me check what files are in the current directory to get a better understanding of the codebase. + + [Makes bash tool call with {"command": "ls -la"} as arguments] + + + + {{system}} {{release}} {{version}} {{machine}} + + + ## Useful command examples + + ### Create a new file: + + ```bash + cat <<'EOF' > newfile.py + import numpy as np + hello = "world" + print(hello) + EOF + ``` + + ### Edit files with sed: + + {%- if system == "Darwin" -%} + + You are on MacOS. For all the below examples, you need to use `sed -i ''` instead of `sed -i`. + + {%- endif -%} + + ```bash + # Replace all occurrences + sed -i 's/old_string/new_string/g' filename.py + + # Replace only first occurrence + sed -i 's/old_string/new_string/' filename.py + + # Replace first occurrence on line 1 + sed -i '1s/old_string/new_string/' filename.py + + # Replace all occurrences in lines 1-10 + sed -i '1,10s/old_string/new_string/g' filename.py + ``` + + ### View file content: + + ```bash + # View specific lines with numbers + nl -ba filename.py | sed -n '10,20p' + ``` + + ### Any other command you want to run + + ```bash + anything + ``` + step_limit: 0 + cost_limit: 3. + mode: confirm +environment: + env: + PAGER: cat + MANPAGER: cat + LESS: -R + PIP_PROGRESS_BAR: 'off' + TQDM_DISABLE: '1' +model: + observation_template: | + {%- if output.output | length < 10000 -%} + { + "returncode": {{ output.returncode }}, + "output": {{ output.output | tojson }} + {%- if output.exception_info %}, "exception_info": {{ output.exception_info | tojson }}{% endif %} + } + {%- else -%} + { + "returncode": {{ output.returncode }}, + "output_head": {{ output.output[:5000] | tojson }}, + "output_tail": {{ output.output[-5000:] | tojson }}, + "elided_chars": {{ output.output | length - 10000 }}, + "warning": "Output too long." + {%- if output.exception_info %}, "exception_info": {{ output.exception_info | tojson }}{% endif %} + } + {%- endif -%} + format_error_template: | + Tool call error: + + + {{error}} + + + Here is general guidance on how to submit correct toolcalls: + + Every response needs to use the 'bash' tool at least once to execute commands. + + Call the bash tool with your command as the argument: + - Tool: bash + - Arguments: {"command": "your_command_here"} + + If you want to end the task, please issue the following command: `echo COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT` + without any other command. + model_kwargs: + drop_params: true diff --git a/src/cooperbench/agents/mini_swe_agent_v2/connectors/__init__.py b/src/cooperbench/agents/mini_swe_agent_v2/connectors/__init__.py new file mode 100644 index 0000000..adc30bb --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/connectors/__init__.py @@ -0,0 +1,21 @@ +"""Connectors for inter-agent communication.""" + +from cooperbench.agents.mini_swe_agent_v2.connectors.git import GitConnector +from cooperbench.agents.mini_swe_agent_v2.connectors.git_servers import ( + DockerGitServer, + GCPGitServer, + GitServer, + ModalGitServer, + create_git_server, +) +from cooperbench.agents.mini_swe_agent_v2.connectors.messaging import MessagingConnector + +__all__ = [ + "DockerGitServer", + "GCPGitServer", + "GitConnector", + "GitServer", + "MessagingConnector", + "ModalGitServer", + "create_git_server", +] diff --git a/src/cooperbench/agents/mini_swe_agent_v2/connectors/git.py b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git.py new file mode 100644 index 0000000..b0781be --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git.py @@ -0,0 +1,132 @@ +"""Git-based code sharing between agents. + +Enables agents in separate containers to share code via git push/pull. +Uses a shared git server sandbox that agents connect to as a remote. + +Architecture: + +---------------------------------------------------------+ + | Git Server Sandbox | + | git daemon --enable=receive-pack (bare repo) | + +---------------------------------------------------------+ + ^ + +----------------+----------------+ + | | | + git push git fetch git push + git pull git pull + | | | + +---------v----+ +---------v----+ + | Agent A | | Agent B | + | sandbox | | sandbox | + +--------------+ +--------------+ + +Example: + # Create shared git server (once per task) + from cooperbench.agents.mini_swe_agent_v2.connectors.git_servers import get_git_server + + GitServerClass = get_git_server("docker") # or "modal" + git_server = GitServerClass.create(run_id="abc123") + + # Create connector for each agent + git = GitConnector( + agent_id="agent1", + agents=["agent1", "agent2"], + server_url=git_server.url + ) + + # Configure agent's sandbox + git.setup(env) + + # Agent can now use git normally: + # git push team agent1 + # git fetch team + # git merge team/agent2 +""" + +from __future__ import annotations + +import logging +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from cooperbench.agents.mini_swe_agent_v2.environments.docker import DockerEnvironment + + +class GitConnector: + """Configures an agent's sandbox for git collaboration. + + After setup(), the agent can use standard git commands: + - git push team - share changes + - git fetch team - get other agents' branches + - git merge team/ - merge another agent's work + - git branch -r - list remote branches + """ + + # Remote name used in agent's git config + REMOTE_NAME = "team" + + def __init__( + self, + agent_id: str, + agents: list[str], + server_url: str, + ): + """Initialize git connector. + + Args: + agent_id: This agent's unique identifier (e.g., "agent1") + agents: List of all agent IDs in the collaboration + server_url: Git server URL from GitServer.url + """ + self.agent_id = agent_id + self.agents = agents + self.server_url = server_url + self._logger = logging.getLogger("cooperbench.agents.mini_swe_agent_v2.git_connector") + self._initialized = False + + def _exec(self, env: DockerEnvironment, command: str) -> dict: + """Execute a command in the environment (v2 uses dict-based actions).""" + return env.execute({"command": command}) + + def setup(self, env: DockerEnvironment) -> None: + """Configure git remote in the agent's sandbox. + + This sets up the 'team' remote pointing to the shared git server, + creates an agent-specific branch, and pushes the initial state. + + Args: + env: The agent's Docker environment + + Raises: + RuntimeError: If git configuration fails + """ + self._logger.debug(f"Setting up git for {self.agent_id}") + + # Configure git user (needed for commits) + self._exec(env, 'git config user.email "agent@cooperbench.local"') + self._exec(env, f'git config user.name "{self.agent_id}"') + + # Add shared remote + result = self._exec(env, f"git remote add {self.REMOTE_NAME} {self.server_url}") + if result.get("returncode", 0) != 0: + # Remote might already exist + self._exec(env, f"git remote set-url {self.REMOTE_NAME} {self.server_url}") + + # Create agent's branch + self._exec(env, f"git checkout -b {self.agent_id}") + + # Push initial state (first agent initializes the server) + # Use --force in case branch exists from a previous run + result = self._exec(env, f"git push -u {self.REMOTE_NAME} {self.agent_id} --force") + if result.get("returncode", 0) != 0: + self._logger.warning(f"Initial push failed: {result.get('output', '')}") + + # Also push main/master as base reference + self._exec(env, f"git push {self.REMOTE_NAME} HEAD:refs/heads/main --force 2>/dev/null || true") + + self._initialized = True + self._logger.debug(f"Git setup complete for {self.agent_id}") + + @property + def is_initialized(self) -> bool: + """Whether setup() has been called.""" + return self._initialized diff --git a/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/__init__.py b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/__init__.py new file mode 100644 index 0000000..6f4b765 --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/__init__.py @@ -0,0 +1,89 @@ +"""Git servers for inter-agent code collaboration. + +Provides pluggable backends for hosting shared git repositories: +- Modal: Cloud-based sandboxes (default) +- Docker: Local containers +- GCP: Google Cloud Platform VMs +""" + +from __future__ import annotations + +from typing import TYPE_CHECKING + +from cooperbench.agents.mini_swe_agent_v2.connectors.git_servers.base import GitServer +from cooperbench.agents.mini_swe_agent_v2.connectors.git_servers.docker import DockerGitServer +from cooperbench.agents.mini_swe_agent_v2.connectors.git_servers.gcp import GCPGitServer +from cooperbench.agents.mini_swe_agent_v2.connectors.git_servers.modal import ModalGitServer + +if TYPE_CHECKING: + import modal + +__all__ = ["GitServer", "ModalGitServer", "DockerGitServer", "GCPGitServer", "create_git_server"] + + +def create_git_server( + backend: str, + run_id: str, + *, + app: modal.App | None = None, + timeout: int = 3600, + # GCP-specific options + project_id: str | None = None, + zone: str = "us-central1-a", + machine_type: str = "e2-micro", + network: str | None = None, +) -> ModalGitServer | DockerGitServer | GCPGitServer: + """Create a git server for the specified backend. + + Args: + backend: Backend name ("modal", "docker", or "gcp") + run_id: Unique run identifier + app: Modal app (required for modal backend) + timeout: Server timeout in seconds + project_id: GCP project ID (gcp backend only) + zone: GCP zone (gcp backend only, default: us-central1-a) + machine_type: GCP machine type (gcp backend only, default: e2-micro) + network: VPC network name (gcp backend only, for agent connectivity) + + Returns: + Git server instance ready to accept connections + + Example: + # Docker backend + server = create_git_server("docker", run_id="my-run") + + # Modal backend + app = modal.App.lookup("cooperbench", create_if_missing=True) + server = create_git_server("modal", run_id="my-run", app=app) + + # GCP backend + server = create_git_server( + "gcp", + run_id="my-run", + project_id="my-project", + network="cooperbench-vpc", + ) + + # Use the server + print(server.url) + # ... agents push/pull ... + server.cleanup() + """ + if backend == "docker": + return DockerGitServer.create(run_id=run_id, timeout=timeout) + elif backend == "modal": + if app is None: + raise ValueError("Modal backend requires 'app' parameter") + return ModalGitServer.create(app=app, run_id=run_id, timeout=timeout) + elif backend == "gcp": + return GCPGitServer.create( + run_id=run_id, + project_id=project_id, + zone=zone, + machine_type=machine_type, + network=network, + timeout=timeout, + ) + else: + available = "docker, modal, gcp" + raise ValueError(f"Unknown git server backend: '{backend}'. Available: {available}") diff --git a/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/base.py b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/base.py new file mode 100644 index 0000000..2dc4e9a --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/base.py @@ -0,0 +1,24 @@ +"""Base protocol for git servers.""" + +from typing import Protocol + + +class GitServer(Protocol): + """Abstract git server for code collaboration. + + Git servers provide a shared repository that agents can push to and pull from. + Different implementations can use Docker, Modal, GCP VMs, etc. + """ + + @property + def url(self) -> str: + """Git URL for agents to use as remote. + + Returns: + Git URL for the repository (e.g., git://hostname:port/repo.git) + """ + ... + + def cleanup(self) -> None: + """Terminate and clean up the git server resources.""" + ... diff --git a/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/docker.py b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/docker.py new file mode 100644 index 0000000..8e32b2e --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/docker.py @@ -0,0 +1,175 @@ +"""Docker-based git server for code collaboration.""" + +from __future__ import annotations + +import logging +import time + +import docker + + +class DockerGitServer: + """Shared git server container for code collaboration using Docker. + + Creates a Docker container running git-daemon that agents can push/pull to. + """ + + def __init__(self, container, hostname: str, port: int, network_name: str): + """Initialize with an existing container. + + Use DockerGitServer.create() to create a new server. + """ + self._container = container + self._hostname = hostname + self._port = port + self._network_name = network_name + self._logger = logging.getLogger("cooperbench.agents.mini_swe_agent_v2.git_server.docker") + + @classmethod + def create( + cls, + run_id: str, + timeout: int = 3600, + ) -> DockerGitServer: + """Create and start a git server container. + + Args: + run_id: Unique run identifier (for container naming) + timeout: Container timeout in seconds (not enforced, for compatibility) + + Returns: + DockerGitServer instance ready to accept connections + """ + logger = logging.getLogger("cooperbench.agents.mini_swe_agent_v2.git_server.docker") + logger.debug(f"Creating docker git server for run {run_id}") + + client = docker.from_env() + + # Use a simple Debian-based image with git + image = "debian:bookworm-slim" + + # Pull image if not present + try: + client.images.get(image) + except docker.errors.ImageNotFound: + logger.debug(f"Pulling image {image}") + client.images.pull(image) + + # Create or get shared network for git server and agents + network_name = f"cooperbench-git-{run_id}" + try: + client.networks.get(network_name) + except docker.errors.NotFound: + client.networks.create(network_name, driver="bridge") + + # Container name based on run_id + container_name = f"cooperbench-git-{run_id}" + + # Remove existing container if it exists + try: + old_container = client.containers.get(container_name) + old_container.remove(force=True) + except docker.errors.NotFound: + pass + + # Create and start container with initialization script + # The script initializes the repo, then starts git daemon in foreground to keep container alive + init_script = """#!/bin/bash +set -e +apt-get update -qq +apt-get install -y -qq git > /dev/null 2>&1 +mkdir -p /git/repo.git +cd /git/repo.git +git init --bare +git config receive.denyCurrentBranch ignore +touch git-daemon-export-ok +exec git daemon --reuseaddr --export-all --enable=receive-pack --base-path=/git --listen=0.0.0.0 /git +""" + + container = client.containers.run( + image=image, + command=["bash", "-c", init_script], + name=container_name, + detach=True, + network=network_name, + ports={"9418/tcp": None}, # Auto-assign port for host access + remove=False, + ) + + # Wait for container to start and git daemon to initialize + time.sleep(3) + + # Verify container is running + container.reload() + if container.status != "running": + logs = container.logs().decode("utf-8", errors="replace") + container.remove(force=True) + raise RuntimeError(f"Git server container failed to start. Logs: {logs}") + + # Reload container to get port mapping + container.reload() + + # Get the host port + port_bindings = container.attrs.get("NetworkSettings", {}).get("Ports", {}) + if "9418/tcp" not in port_bindings or not port_bindings["9418/tcp"]: + container.stop() + container.remove(force=True) + raise RuntimeError("Failed to get port mapping for git daemon") + + # Get container's IP on the network for inter-container communication + container.reload() + network_settings = container.attrs.get("NetworkSettings", {}) + networks = network_settings.get("Networks", {}) + if network_name in networks: + container_ip = networks[network_name].get("IPAddress") + if container_ip: + hostname = container_ip + else: + # Fallback to container name (DNS resolution on same network) + hostname = container_name + else: + # Fallback to container name + hostname = container_name + + logger.debug(f"Git server ready at git://{hostname}:9418 (network: {network_name})") + + return cls(container=container, hostname=hostname, port=9418, network_name=network_name) + + @property + def url(self) -> str: + """Git URL for agents to use as remote. + + Returns: + Git URL for the repository (git://hostname:port/repo.git) + """ + return f"git://{self._hostname}:{self._port}/repo.git" + + @property + def network_name(self) -> str: + """Docker network name for agent containers to join.""" + return self._network_name + + def cleanup(self) -> None: + """Stop and remove the git server container and network.""" + if self._container: + try: + self._container.stop(timeout=5) + except Exception: + pass + try: + self._container.remove(force=True) + except Exception: + pass + self._container = None + + # Clean up network + if hasattr(self, "_network_name") and self._network_name: + try: + client = docker.from_env() + try: + network = client.networks.get(self._network_name) + network.remove() + except docker.errors.NotFound: + pass + except Exception: + pass diff --git a/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/gcp.py b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/gcp.py new file mode 100644 index 0000000..a3e41f5 --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/gcp.py @@ -0,0 +1,544 @@ +"""GCP VM-based git server for code collaboration.""" + +from __future__ import annotations + +import logging +import subprocess +import time +import uuid + + +class GCPGitServer: + """Shared git server on GCP VM for code collaboration. + + Creates a GCP VM running git-daemon that agents can push/pull to. + Supports VPC networking for agents on the same network. + + Example: + server = GCPGitServer.create( + run_id="my-run", + project_id="my-project", + network="cooperbench-vpc", + ) + print(server.url) # git://10.128.0.5:9418/repo.git + print(server.network_name) # cooperbench-vpc + # ... agents push/pull ... + server.cleanup() + """ + + def __init__( + self, + *, + vm_name: str, + project_id: str, + zone: str, + network: str | None, + git_url: str, + ): + """Initialize with existing VM info. + + Use GCPGitServer.create() to create a new server. + """ + self._vm_name = vm_name + self._project_id = project_id + self._zone = zone + self._network = network + self._git_url = git_url + self._vm_created = False + self._firewall_created = False + self._compute_client = None + self._logger = logging.getLogger("cooperbench.agents.mini_swe_agent_v2.git_server.gcp") + + @classmethod + def create( + cls, + run_id: str, + *, + project_id: str | None = None, + zone: str = "us-central1-a", + machine_type: str = "e2-micro", + network: str | None = None, + timeout: int = 3600, + ) -> GCPGitServer: + """Create and start a git server VM. + + Args: + run_id: Unique run identifier + project_id: GCP project ID (defaults to env/gcloud config) + zone: GCP zone for the VM + machine_type: VM machine type (e2-micro is smallest/cheapest) + network: VPC network name for agent connectivity (None for external IP) + timeout: VM timeout in seconds (not enforced, for reference) + + Returns: + GCPGitServer instance ready to accept connections + """ + logger = logging.getLogger("cooperbench.agents.mini_swe_agent_v2.git_server.gcp") + logger.debug(f"Creating GCP git server for run {run_id}") + + # Resolve project ID + resolved_project_id = project_id or cls._get_default_project() + if not resolved_project_id: + raise ValueError( + "project_id required. Set via parameter, GOOGLE_CLOUD_PROJECT env var, " + "or gcloud config set project " + ) + + # Generate unique VM name (sanitize run_id for GCP naming rules) + sanitized_run_id = run_id.replace("_", "-").lower()[:20] + vm_name = f"cooperbench-git-{sanitized_run_id}-{uuid.uuid4().hex[:8]}" + + instance = cls( + vm_name=vm_name, + project_id=resolved_project_id, + zone=zone, + network=network, + git_url="", # Will be set after VM is created + ) + + # Create VM and get IP + instance._create_git_server_vm(machine_type) + instance._wait_for_ssh() + + # Create firewall rule to allow git protocol traffic + instance._create_firewall_rule() + + # Get the git URL (internal IP if VPC, external IP otherwise) + ip_address = instance._get_vm_ip(use_internal=network is not None) + instance._git_url = f"git://{ip_address}:9418/repo.git" + + logger.debug(f"Git server ready at {instance._git_url}") + return instance + + @staticmethod + def _get_default_project() -> str | None: + """Get default GCP project from environment or gcloud config.""" + import os + + # Try environment variable first + project = os.environ.get("GOOGLE_CLOUD_PROJECT") or os.environ.get("GCLOUD_PROJECT") + if project: + return project + + # Try cooperbench config + try: + from cooperbench.config import ConfigManager + + config = ConfigManager() + project = config.get("gcp_project_id") + if project: + return project + except Exception: + pass + + # Try gcloud config + try: + result = subprocess.run( + ["gcloud", "config", "get-value", "project"], + capture_output=True, + text=True, + timeout=10, + ) + if result.returncode == 0 and result.stdout.strip(): + return result.stdout.strip() + except Exception: + pass + + return None + + def _get_compute_client(self): + """Get or create Compute Engine client.""" + if self._compute_client is None: + from google.cloud import compute_v1 + + self._compute_client = compute_v1.InstancesClient() + return self._compute_client + + def _create_git_server_vm(self, machine_type: str): + """Create the GCP VM with git-daemon.""" + from google.cloud import compute_v1 + + self._logger.debug(f"Creating GCP VM {self._vm_name} in {self._zone}") + + client = self._get_compute_client() + + # Startup script to install git and start git-daemon + startup_script = """#!/bin/bash +set -e + +# Install git +apt-get update && apt-get install -y git + +# Create bare repo +mkdir -p /git/repo.git +cd /git/repo.git +git init --bare +git config receive.denyCurrentBranch ignore +touch git-daemon-export-ok + +# Start git daemon +# --enable=receive-pack allows pushing +# --export-all exports all repos +# --base-path=/git means URL /repo.git maps to /git/repo.git +# --reuseaddr allows quick restarts +git daemon \ + --reuseaddr \ + --export-all \ + --enable=receive-pack \ + --base-path=/git \ + --listen=0.0.0.0 \ + /git & + +echo "Git daemon started" +""" + + # Build instance configuration + instance = compute_v1.Instance() + instance.name = self._vm_name + instance.machine_type = f"zones/{self._zone}/machineTypes/{machine_type}" + + # Boot disk with Debian + disk = compute_v1.AttachedDisk() + disk.auto_delete = True + disk.boot = True + disk.type_ = "PERSISTENT" + + disk_init = compute_v1.AttachedDiskInitializeParams() + disk_init.disk_size_gb = 10 # Small disk for git server + disk_init.source_image = "projects/debian-cloud/global/images/family/debian-11" + disk.initialize_params = disk_init + instance.disks = [disk] + + # Network interface + network_interface = compute_v1.NetworkInterface() + if self._network: + network_interface.network = f"projects/{self._project_id}/global/networks/{self._network}" + else: + network_interface.network = f"projects/{self._project_id}/global/networks/default" + + # External IP for SSH access (and git access if no VPC) + access_config = compute_v1.AccessConfig() + access_config.name = "External NAT" + access_config.type_ = "ONE_TO_ONE_NAT" + network_interface.access_configs = [access_config] + instance.network_interfaces = [network_interface] + + # Service account with minimal scopes + service_account = compute_v1.ServiceAccount() + service_account.email = "default" + service_account.scopes = [ + "https://www.googleapis.com/auth/logging.write", + ] + instance.service_accounts = [service_account] + + # Metadata with startup script + metadata = compute_v1.Metadata() + metadata.items = [ + compute_v1.Items(key="startup-script", value=startup_script), + ] + instance.metadata = metadata + + # Tags for firewall rules + tags = compute_v1.Tags() + tags.items = [f"cooperbench-git-{self._vm_name}"] + instance.tags = tags + + # Create the instance + request = compute_v1.InsertInstanceRequest() + request.project = self._project_id + request.zone = self._zone + request.instance_resource = instance + + operation = client.insert(request=request) + + # Mark VM as created immediately after insert succeeds + # This ensures cleanup() will delete the VM even if _wait_for_operation times out + self._vm_created = True + + # Wait for operation to complete + self._wait_for_operation(operation.name) + self._logger.debug(f"VM {self._vm_name} created successfully") + + def _wait_for_operation(self, operation_name: str, timeout: int = 300): + """Wait for a zone operation to complete.""" + from google.cloud import compute_v1 + + client = compute_v1.ZoneOperationsClient() + start_time = time.time() + + while time.time() - start_time < timeout: + operation = client.get( + project=self._project_id, + zone=self._zone, + operation=operation_name, + ) + + if operation.status == compute_v1.Operation.Status.DONE: + if operation.error: + errors = [e.message for e in operation.error.errors] + raise RuntimeError(f"VM operation failed: {errors}") + return + + time.sleep(2) + + raise TimeoutError(f"Operation {operation_name} timed out after {timeout}s") + + def _wait_for_global_operation(self, operation_name: str, timeout: int = 300): + """Wait for a global operation to complete.""" + from google.cloud import compute_v1 + + client = compute_v1.GlobalOperationsClient() + start_time = time.time() + + while time.time() - start_time < timeout: + operation = client.get( + project=self._project_id, + operation=operation_name, + ) + + if operation.status == compute_v1.Operation.Status.DONE: + if operation.error: + errors = [e.message for e in operation.error.errors] + raise RuntimeError(f"Global operation failed: {errors}") + return + + time.sleep(2) + + raise TimeoutError(f"Operation {operation_name} timed out after {timeout}s") + + def _wait_for_ssh(self, timeout: int = 180): + """Wait for SSH to become available on the VM.""" + self._logger.debug(f"Waiting for SSH on {self._vm_name}...") + start_time = time.time() + + while time.time() - start_time < timeout: + try: + # Try a simple SSH command + result = subprocess.run( + [ + "gcloud", + "compute", + "ssh", + self._vm_name, + f"--zone={self._zone}", + f"--project={self._project_id}", + "--command=echo ready", + "--quiet", + "--strict-host-key-checking=no", + ], + capture_output=True, + text=True, + timeout=30, + ) + if result.returncode == 0 and "ready" in result.stdout: + self._logger.debug("SSH is ready") + # Wait a bit more for startup script to complete + self._wait_for_git_daemon() + return + except subprocess.TimeoutExpired: + pass + except Exception as e: + self._logger.debug(f"SSH not ready yet: {e}") + + time.sleep(5) + + raise TimeoutError(f"SSH not available on {self._vm_name} after {timeout}s") + + def _wait_for_git_daemon(self, timeout: int = 120): + """Wait for git-daemon to start on the VM.""" + self._logger.debug("Waiting for git-daemon to start...") + start_time = time.time() + + while time.time() - start_time < timeout: + try: + result = subprocess.run( + [ + "gcloud", + "compute", + "ssh", + self._vm_name, + f"--zone={self._zone}", + f"--project={self._project_id}", + "--command=pgrep -f git-daemon", + "--quiet", + "--strict-host-key-checking=no", + ], + capture_output=True, + text=True, + timeout=30, + ) + if result.returncode == 0: + self._logger.debug("git-daemon is running") + return + except Exception as e: + self._logger.debug(f"git-daemon not ready yet: {e}") + + time.sleep(5) + + raise TimeoutError(f"git-daemon not started on {self._vm_name} after {timeout}s") + + def _create_firewall_rule(self): + """Create firewall rule to allow git protocol traffic.""" + from google.cloud import compute_v1 + + firewall_name = f"cooperbench-git-{self._vm_name}" + self._logger.debug(f"Creating firewall rule {firewall_name}") + + client = compute_v1.FirewallsClient() + + firewall = compute_v1.Firewall() + firewall.name = firewall_name + + # Use specified VPC or default network + network_name = self._network or "default" + firewall.network = f"projects/{self._project_id}/global/networks/{network_name}" + + # Allow TCP port 9418 (git daemon) + allowed = compute_v1.Allowed() + allowed.I_p_protocol = "tcp" + allowed.ports = ["9418"] + firewall.allowed = [allowed] + + # Source ranges: internal only for VPC, anywhere for external mode + if self._network: + firewall.source_ranges = ["10.0.0.0/8"] + else: + firewall.source_ranges = ["0.0.0.0/0"] + + # Target only this VM + firewall.target_tags = [f"cooperbench-git-{self._vm_name}"] + + request = compute_v1.InsertFirewallRequest() + request.project = self._project_id + request.firewall_resource = firewall + + operation = client.insert(request=request) + + # Mark firewall as created immediately + self._firewall_created = True + + # Wait for operation + self._wait_for_global_operation(operation.name) + self._logger.debug(f"Firewall rule {firewall_name} created") + + def _get_vm_ip(self, use_internal: bool = False) -> str: + """Get the IP address of the VM. + + Args: + use_internal: If True, return internal IP (for VPC). Otherwise external. + + Returns: + IP address string + """ + from google.cloud import compute_v1 + + client = self._get_compute_client() + + request = compute_v1.GetInstanceRequest() + request.project = self._project_id + request.zone = self._zone + request.instance = self._vm_name + + instance = client.get(request=request) + + if not instance.network_interfaces: + raise RuntimeError(f"VM {self._vm_name} has no network interfaces") + + network_interface = instance.network_interfaces[0] + + if use_internal: + # Return internal IP + return network_interface.network_i_p + else: + # Return external IP + if not network_interface.access_configs: + raise RuntimeError(f"VM {self._vm_name} has no external IP") + return network_interface.access_configs[0].nat_i_p + + @property + def url(self) -> str: + """Git URL for agents to use as remote. + + Returns: + Git URL for the repository (git://IP:9418/repo.git) + """ + return self._git_url + + @property + def network_name(self) -> str | None: + """VPC network name for agents to join. + + Returns: + Network name or None if using external IP + """ + return self._network + + def cleanup(self) -> None: + """Delete the VM and firewall rule.""" + # Delete firewall rule first + if self._firewall_created: + self._delete_firewall_rule() + + # Delete VM + if self._vm_created: + self._delete_vm() + + def _delete_firewall_rule(self): + """Delete the firewall rule.""" + from google.cloud import compute_v1 + + firewall_name = f"cooperbench-git-{self._vm_name}" + self._logger.debug(f"Deleting firewall rule {firewall_name}") + + try: + client = compute_v1.FirewallsClient() + + request = compute_v1.DeleteFirewallRequest() + request.project = self._project_id + request.firewall = firewall_name + + operation = client.delete(request=request) + + try: + self._wait_for_global_operation(operation.name, timeout=120) + except TimeoutError: + self._logger.warning(f"Firewall deletion timed out: {firewall_name}") + + self._firewall_created = False + self._logger.debug(f"Firewall rule {firewall_name} deleted") + except Exception as e: + self._logger.warning(f"Failed to delete firewall rule {firewall_name}: {e}") + + def _delete_vm(self): + """Delete the VM.""" + self._logger.debug(f"Deleting VM {self._vm_name}") + + try: + from google.cloud import compute_v1 + + client = self._get_compute_client() + + request = compute_v1.DeleteInstanceRequest() + request.project = self._project_id + request.zone = self._zone + request.instance = self._vm_name + + operation = client.delete(request=request) + + try: + self._wait_for_operation(operation.name, timeout=120) + except TimeoutError: + # VM deletion initiated, will complete asynchronously + pass + + self._vm_created = False + self._logger.debug(f"VM {self._vm_name} deleted") + except Exception as e: + self._logger.warning(f"Failed to delete VM {self._vm_name}: {e}") + + def __del__(self): + """Ensure cleanup on garbage collection.""" + try: + self.cleanup() + except Exception: + pass diff --git a/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/modal.py b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/modal.py new file mode 100644 index 0000000..fc8fc74 --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/connectors/git_servers/modal.py @@ -0,0 +1,139 @@ +"""Modal-based git server for code collaboration.""" + +from __future__ import annotations + +import logging +import time + +import modal + + +class ModalGitServer: + """Shared git server sandbox for code collaboration using Modal. + + Creates a Modal sandbox running git-daemon that agents can push/pull to. + """ + + def __init__(self, sandbox: modal.Sandbox, hostname: str): + """Initialize with an existing sandbox. + + Use ModalGitServer.create() to create a new server. + """ + self._sandbox = sandbox + self._hostname = hostname + self._logger = logging.getLogger("cooperbench.agents.mini_swe_agent_v2.git_server.modal") + + @classmethod + def create( + cls, + app: modal.App, + run_id: str, + timeout: int = 3600, + ) -> ModalGitServer: + """Create and start a git server sandbox. + + Args: + app: Modal app to create sandbox in + run_id: Unique run identifier (for logging) + timeout: Sandbox timeout in seconds + + Returns: + ModalGitServer instance ready to accept connections + """ + logger = logging.getLogger("cooperbench.agents.mini_swe_agent_v2.git_server.modal") + logger.debug(f"Creating git server for run {run_id}") + + # Image with git + image = modal.Image.debian_slim().run_commands( + "apt-get update && apt-get install -y git", + ) + + # Create sandbox with port 9418 exposed for git daemon (unencrypted TCP) + sandbox = modal.Sandbox.create( + image=image, + app=app, + timeout=timeout, + unencrypted_ports=[9418], # Expose git daemon port via TCP tunnel + ) + + # Initialize bare repo in /git/repo.git + proc = sandbox.exec( + "bash", + "-c", + """ + set -e + mkdir -p /git/repo.git + cd /git/repo.git + git init --bare + git config receive.denyCurrentBranch ignore + touch git-daemon-export-ok + """, + ) + proc.wait() + if proc.returncode != 0: + raise RuntimeError(f"Failed to init git repo: {proc.stderr.read()}") + + # Start git daemon in background + # --enable=receive-pack allows pushing + # --export-all exports all repos + # --base-path=/git means URL /repo.git maps to /git/repo.git + # --listen=0.0.0.0 to accept connections from tunnel + proc = sandbox.exec( + "bash", + "-c", + """ + git daemon \ + --reuseaddr \ + --export-all \ + --enable=receive-pack \ + --base-path=/git \ + --listen=0.0.0.0 \ + /git & + + # Wait for daemon to start + sleep 1 + echo "Git daemon started" + """, + ) + proc.stdout.read() + proc.wait() + + # Give daemon time to fully initialize + time.sleep(1) + + # Get the tunnel URL for port 9418 + tunnels = sandbox.tunnels() + + if tunnels and 9418 in tunnels: + tunnel = tunnels[9418] + # Use the unencrypted endpoint for git protocol + # Tunnel has: host, port (encrypted), unencrypted_host, unencrypted_port + hostname = f"{tunnel.unencrypted_host}:{tunnel.unencrypted_port}" + logger.debug(f"Using unencrypted tunnel: {hostname}") + else: + raise RuntimeError(f"Failed to get tunnel for port 9418. Available tunnels: {tunnels}") + + logger.debug(f"Git server ready at git://{hostname}") + + return cls(sandbox=sandbox, hostname=hostname) + + @property + def url(self) -> str: + """Git URL for agents to use as remote. + + Returns: + Git URL for the repository (git://hostname/repo.git) + """ + return f"git://{self._hostname}/repo.git" + + def cleanup(self) -> None: + """Terminate the git server sandbox.""" + if self._sandbox: + try: + self._sandbox.terminate() + except Exception: + pass + + +# Backwards compatibility alias +GitServer = ModalGitServer diff --git a/src/cooperbench/agents/mini_swe_agent_v2/connectors/messaging.py b/src/cooperbench/agents/mini_swe_agent_v2/connectors/messaging.py new file mode 100644 index 0000000..59dd1c3 --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/connectors/messaging.py @@ -0,0 +1,113 @@ +"""Redis-based mailbox messaging between agents. + +Provides simple send/receive messaging via Redis lists. Each agent has an inbox +that other agents can push messages to. + +Example: + connector = MessagingConnector( + agent_id="agent1", + agents=["agent1", "agent2"], + url="redis://localhost:6379#run:abc123" + ) + + # Send to specific agent + connector.send("agent2", "I found a bug in auth.py") + + # Receive pending messages + messages = connector.receive() + + # Broadcast to all + connector.broadcast("I'm starting on the API changes") +""" + +import json +from datetime import datetime +from typing import Any + +import redis + + +class MessagingConnector: + """Redis-based mailbox messaging between agents.""" + + def __init__(self, agent_id: str, agents: list[str], url: str = "redis://localhost:6379"): + """Initialize messaging connector. + + Args: + agent_id: This agent's unique identifier (e.g., "agent1") + agents: List of all agent IDs in the collaboration + url: Redis URL. Supports namespacing via #prefix (e.g., "redis://host:6379#run:abc") + """ + self.agent_id = agent_id + self.agents = agents + + # Parse optional namespace prefix from URL (format: url#prefix) + if "#" in url: + url, self._prefix = url.split("#", 1) + self._prefix += ":" + else: + self._prefix = "" + + self._client = redis.from_url(url) + self._inbox_key = f"{self._prefix}{agent_id}:inbox" + + # Clear stale messages from previous runs + self._client.delete(self._inbox_key) + + def setup(self, env: Any) -> None: + """Configure the agent's sandbox for messaging. + + Messaging doesn't require sandbox configuration (it's pure Redis), + but this method exists for interface consistency with other connectors. + + Args: + env: The agent's environment (unused for messaging) + """ + pass + + def send(self, recipient: str, content: str) -> None: + """Send a message to another agent's inbox. + + Args: + recipient: Target agent's ID + content: Message content + """ + message = { + "from": self.agent_id, + "to": recipient, + "content": content, + "timestamp": datetime.now().isoformat(), + } + self._client.rpush(f"{self._prefix}{recipient}:inbox", json.dumps(message)) + + def receive(self) -> list[dict]: + """Get all pending messages from inbox (empties the inbox). + + Returns: + List of message dicts with from, to, content, timestamp + """ + messages = [] + while True: + msg = self._client.lpop(self._inbox_key) + if msg is None: + break + messages.append(json.loads(msg)) + return messages + + def broadcast(self, content: str) -> None: + """Send a message to all other agents. + + Args: + content: Message content + """ + for agent in self.agents: + if agent != self.agent_id: + self.send(agent, content) + + def peek(self) -> int: + """Check how many messages are waiting without consuming them. + + Returns: + Number of pending messages + """ + return self._client.llen(self._inbox_key) diff --git a/src/cooperbench/agents/mini_swe_agent_v2/environments/__init__.py b/src/cooperbench/agents/mini_swe_agent_v2/environments/__init__.py new file mode 100644 index 0000000..2e22b4e --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/environments/__init__.py @@ -0,0 +1 @@ +"""Environment implementations for mini-SWE-agent v2.""" diff --git a/src/cooperbench/agents/mini_swe_agent_v2/environments/docker.py b/src/cooperbench/agents/mini_swe_agent_v2/environments/docker.py new file mode 100644 index 0000000..f651783 --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/environments/docker.py @@ -0,0 +1,161 @@ +import logging +import os +import platform +import shlex +import subprocess +import uuid +from typing import Any + +from pydantic import BaseModel + +from cooperbench.agents.mini_swe_agent_v2.exceptions import Submitted +from cooperbench.agents.mini_swe_agent_v2.utils.serialize import recursive_merge + + +class DockerEnvironmentConfig(BaseModel): + image: str + cwd: str = "/" + """Working directory in which to execute commands.""" + env: dict[str, str] = {} + """Environment variables to set in the container.""" + forward_env: list[str] = [] + """Environment variables to forward to the container. + Variables are only forwarded if they are set in the host environment. + In case of conflict with `env`, the `env` variables take precedence. + """ + timeout: int = 30 + """Timeout for executing commands in the container.""" + executable: str = os.getenv("MSWEA_DOCKER_EXECUTABLE", "docker") + """Path to the docker/container executable.""" + run_args: list[str] = ["--rm"] + """Additional arguments to pass to the docker/container executable. + Default is ["--rm"], which removes the container after it exits. + """ + container_timeout: str = "2h" + """Max duration to keep container running. Uses the same format as the sleep command.""" + pull_timeout: int = 120 + """Timeout in seconds for pulling images.""" + interpreter: list[str] = ["bash", "-lc"] + """Interpreter to use to execute commands. Default is ["bash", "-lc"]. + The actual command will be appended as argument to this. Override this to e.g., modify shell flags + (e.g., to remove the `-l` flag to disable login shell) or to use python instead of bash to interpret commands. + """ + + +class DockerEnvironment: + def __init__( + self, + *, + config_class: type = DockerEnvironmentConfig, + logger: logging.Logger | None = None, + **kwargs, + ): + """This class executes bash commands in a Docker container using direct docker commands. + See `DockerEnvironmentConfig` for keyword arguments. + """ + self.logger = logger or logging.getLogger("mini_swe_agent_v2.environment") + self.container_id: str | None = None + self.config = config_class(**kwargs) + self._start_container() + + def get_template_vars(self, **kwargs) -> dict[str, Any]: + return recursive_merge(self.config.model_dump(), platform.uname()._asdict(), kwargs) + + def serialize(self) -> dict: + return { + "info": { + "config": { + "environment": self.config.model_dump(mode="json"), + "environment_type": f"{self.__class__.__module__}.{self.__class__.__name__}", + } + } + } + + def _start_container(self): + """Start the Docker container and return the container ID.""" + container_name = f"minisweagent-{uuid.uuid4().hex[:8]}" + cmd = [ + self.config.executable, + "run", + "-d", + "--name", + container_name, + "-w", + self.config.cwd, + *self.config.run_args, + self.config.image, + "sleep", + self.config.container_timeout, + ] + self.logger.debug(f"Starting container with command: {shlex.join(cmd)}") + result = subprocess.run( + cmd, + capture_output=True, + text=True, + timeout=self.config.pull_timeout, # docker pull might take a while + check=True, + ) + self.logger.info(f"Started container {container_name} with ID {result.stdout.strip()}") + self.container_id = result.stdout.strip() + + def execute(self, action: dict, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]: + """Execute a command in the Docker container and return the result as a dict.""" + command = action.get("command", "") + cwd = cwd or self.config.cwd + assert self.container_id, "Container not started" + + cmd = [self.config.executable, "exec", "-w", cwd] + for key in self.config.forward_env: + if (value := os.getenv(key)) is not None: + cmd.extend(["-e", f"{key}={value}"]) + for key, value in self.config.env.items(): + cmd.extend(["-e", f"{key}={value}"]) + cmd.extend([self.container_id, *self.config.interpreter, command]) + + try: + result = subprocess.run( + cmd, + text=True, + timeout=timeout or self.config.timeout, + encoding="utf-8", + errors="replace", + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + ) + output = {"output": result.stdout, "returncode": result.returncode, "exception_info": ""} + except Exception as e: + raw_output = getattr(e, "output", None) + raw_output = ( + raw_output.decode("utf-8", errors="replace") if isinstance(raw_output, bytes) else (raw_output or "") + ) + output = { + "output": raw_output, + "returncode": -1, + "exception_info": f"An error occurred while executing the command: {e}", + "extra": {"exception_type": type(e).__name__, "exception": str(e)}, + } + self._check_finished(output) + return output + + def _check_finished(self, output: dict): + """Raises Submitted if the output indicates task completion.""" + lines = output.get("output", "").lstrip().splitlines(keepends=True) + if lines and lines[0].strip() == "COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT" and output["returncode"] == 0: + submission = "".join(lines[1:]) + raise Submitted( + { + "role": "exit", + "content": submission, + "extra": {"exit_status": "Submitted", "submission": submission}, + } + ) + + def cleanup(self): + """Stop and remove the Docker container.""" + if getattr(self, "container_id", None) is not None: # if init fails early, container_id might not be set + cmd = f"(timeout 60 {self.config.executable} stop {self.container_id} || {self.config.executable} rm -f {self.container_id}) >/dev/null 2>&1 &" + subprocess.Popen(cmd, shell=True) + + def __del__(self): + """Cleanup container when object is destroyed.""" + self.cleanup() diff --git a/src/cooperbench/agents/mini_swe_agent_v2/environments/modal.py b/src/cooperbench/agents/mini_swe_agent_v2/environments/modal.py new file mode 100644 index 0000000..ab43d44 --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/environments/modal.py @@ -0,0 +1,264 @@ +"""Modal Sandbox environment for cloud execution (v2 interface).""" + +import logging +import platform +import threading +import time +from typing import Any + +import modal +from pydantic import BaseModel + +# Retryable error patterns +_RETRYABLE_PATTERNS = [ + "Image build", + "UNAVAILABLE", + "DEADLINE_EXCEEDED", + "INTERNAL", + "temporarily unavailable", + "rate limit", + "ClientClosed", + "NOT_FOUND", + "Sandbox not found", + "already shut down", + "Container ID", + "finished", +] + +# Global thread-safe image cache to prevent duplicate builds +_image_cache: dict[str, modal.Image] = {} +_image_locks: dict[str, threading.Lock] = {} +_cache_lock = threading.Lock() + +# Global Modal app (shared across all environments) +_global_app: modal.App | None = None +_app_lock = threading.Lock() + + +def _get_global_app() -> modal.App: + """Get or create the global Modal app (thread-safe).""" + global _global_app + with _app_lock: + if _global_app is None: + _global_app = modal.App.lookup("cooperbench", create_if_missing=True) + return _global_app + + +def _reset_global_app() -> None: + """Reset the global app (e.g., after ClientClosed errors).""" + global _global_app + with _app_lock: + _global_app = None + + +def _get_or_build_image(image_name: str) -> modal.Image: + """Get cached image or build it (thread-safe, only one build per image).""" + if image_name in _image_cache: + return _image_cache[image_name] + + with _cache_lock: + if image_name not in _image_locks: + _image_locks[image_name] = threading.Lock() + lock = _image_locks[image_name] + + with lock: + if image_name in _image_cache: + return _image_cache[image_name] + + image = modal.Image.from_registry(image_name).entrypoint([]) + _image_cache[image_name] = image + return image + + +def _invalidate_image(image_name: str) -> None: + """Remove an image from cache (e.g., after build failure).""" + with _cache_lock: + _image_cache.pop(image_name, None) + + +class ModalEnvironmentConfig(BaseModel): + image: str + cwd: str = "/" + timeout: int = 3600 + env: dict[str, str] = {} + max_retries: int = 5 + retry_delay: float = 5.0 + + +class ModalEnvironment: + sb: modal.Sandbox | None + + def __init__( + self, + *, + config_class: type = ModalEnvironmentConfig, + logger: logging.Logger | None = None, + **kwargs, + ): + self.logger = logger or logging.getLogger("cooperbench.agents.mini_swe_agent_v2.modal") + self.config = config_class(**kwargs) + self.sb = None + self._start_sandbox_with_retry() + + def _reset_client(self): + """Reset Modal client state for fresh connection.""" + _reset_global_app() + _invalidate_image(self.config.image) + + def _build_image(self): + """Build the Modal image (globally cached, thread-safe).""" + return _get_or_build_image(self.config.image) + + def _is_retryable(self, error: Exception) -> bool: + """Check if error is retryable.""" + error_str = str(error) + str(type(error).__name__) + return any(p in error_str for p in _RETRYABLE_PATTERNS) + + def _start_sandbox(self): + """Create and start the Modal Sandbox (single attempt).""" + self.logger.debug(f"Creating Modal Sandbox with image: {self.config.image}") + image = self._build_image() + self.sb = modal.Sandbox.create( + image=image, + timeout=self.config.timeout, + workdir=self.config.cwd, + app=_get_global_app(), + ) + self.logger.debug(f"Sandbox created: {self.sb.object_id}") + + def _start_sandbox_with_retry(self): + """Create sandbox with retry logic for transient failures.""" + last_error = None + for attempt in range(self.config.max_retries): + try: + self._start_sandbox() + return + except Exception as e: + last_error = e + error_str = str(e) + str(type(e).__name__) + + if attempt < self.config.max_retries - 1 and self._is_retryable(e): + delay = self.config.retry_delay * (2**attempt) + self.logger.warning( + f"Sandbox creation failed (attempt {attempt + 1}/{self.config.max_retries}): {e}. " + f"Retrying in {delay:.1f}s..." + ) + time.sleep(delay) + if "ClientClosed" in error_str or "Image build" in error_str: + self._reset_client() + else: + break + + raise last_error + + def _is_sandbox_dead(self, error: Exception) -> bool: + """Check if the error indicates the sandbox has terminated.""" + error_str = str(error) + str(type(error).__name__) + return any( + x in error_str + for x in [ + "NOT_FOUND", + "Sandbox not found", + "already shut down", + "Container ID", + "finished", + "ClientClosed", + ] + ) + + def _reconnect_sandbox(self): + """Attempt to create a new sandbox after the old one died.""" + self.logger.warning("Sandbox terminated unexpectedly, creating new sandbox...") + old_sb = self.sb + self.sb = None + if old_sb: + try: + old_sb.terminate() + except Exception: + pass + self._reset_client() + self._start_sandbox_with_retry() + + def get_template_vars(self, **kwargs) -> dict[str, Any]: + return self.config.model_dump() | { + "system": "Linux", + "release": "modal", + "version": "", + "machine": platform.machine(), + } + + def serialize(self) -> dict: + return { + "info": { + "config": { + "environment": self.config.model_dump(mode="json"), + "environment_type": f"{self.__class__.__module__}.{self.__class__.__name__}", + } + } + } + + def execute(self, action: dict, cwd: str = "", *, timeout: int | None = None) -> dict[str, Any]: + """Execute a command in the Modal Sandbox with retry on sandbox death. + + v2 interface: action is a dict with {"command": "..."}. + """ + command = action.get("command", "") + cwd = cwd or self.config.cwd + last_error: Exception | None = None + + for attempt in range(self.config.max_retries): + try: + if self.sb is None: + raise RuntimeError("Sandbox not initialized") + proc = self.sb.exec("bash", "-lc", f"cd {cwd} && {command}") + stdout = proc.stdout.read() + stderr = proc.stderr.read() + proc.wait() + output = stdout + stderr if stderr else stdout + result = {"output": output, "returncode": proc.returncode, "exception_info": ""} + self._check_finished(result) + return result + except Exception as e: + # Re-raise Submitted exceptions (task completion) + from cooperbench.agents.mini_swe_agent_v2.exceptions import Submitted + + if isinstance(e, Submitted): + raise + last_error = e + if self._is_sandbox_dead(e) and attempt < self.config.max_retries - 1: + self.logger.warning( + f"Sandbox died during execution (attempt {attempt + 1}/{self.config.max_retries}): {e}" + ) + self._reconnect_sandbox() + else: + raise + + if last_error is not None: + raise last_error + raise RuntimeError("No retries attempted") + + def _check_finished(self, output: dict): + """Raises Submitted if the output indicates task completion.""" + from cooperbench.agents.mini_swe_agent_v2.exceptions import Submitted + + lines = output.get("output", "").lstrip().splitlines(keepends=True) + if lines and lines[0].strip() == "COMPLETE_TASK_AND_SUBMIT_FINAL_OUTPUT" and output["returncode"] == 0: + submission = "".join(lines[1:]) + raise Submitted( + { + "role": "exit", + "content": submission, + "extra": {"exit_status": "Submitted", "submission": submission}, + } + ) + + def cleanup(self): + """Terminate the Modal Sandbox.""" + if hasattr(self, "sb") and self.sb: + try: + self.sb.terminate() + except Exception: + pass + + def __del__(self): + self.cleanup() diff --git a/src/cooperbench/agents/mini_swe_agent_v2/exceptions.py b/src/cooperbench/agents/mini_swe_agent_v2/exceptions.py new file mode 100644 index 0000000..0295d7e --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/exceptions.py @@ -0,0 +1,22 @@ +class InterruptAgentFlow(Exception): + """Raised to interrupt the agent flow and add messages.""" + + def __init__(self, *messages: dict): + self.messages = messages + super().__init__() + + +class Submitted(InterruptAgentFlow): + """Raised when the agent has completed its task.""" + + +class LimitsExceeded(InterruptAgentFlow): + """Raised when the agent has exceeded its cost or step limit.""" + + +class UserInterruption(InterruptAgentFlow): + """Raised when the user interrupts the agent.""" + + +class FormatError(InterruptAgentFlow): + """Raised when the LM's output is not in the expected format.""" diff --git a/src/cooperbench/agents/mini_swe_agent_v2/models/__init__.py b/src/cooperbench/agents/mini_swe_agent_v2/models/__init__.py new file mode 100644 index 0000000..70b1047 --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/models/__init__.py @@ -0,0 +1,34 @@ +"""Model implementations for mini-SWE-agent v2.""" + +import os +import threading + + +class GlobalModelStats: + """Global model statistics tracker with optional limits.""" + + def __init__(self): + self._cost = 0.0 + self._n_calls = 0 + self._lock = threading.Lock() + self.cost_limit = float(os.getenv("MSWEA_GLOBAL_COST_LIMIT", "0")) + self.call_limit = int(os.getenv("MSWEA_GLOBAL_CALL_LIMIT", "0")) + + def add(self, cost: float) -> None: + """Add a model call with its cost, checking limits.""" + with self._lock: + self._cost += cost + self._n_calls += 1 + if 0 < self.cost_limit < self._cost or 0 < self.call_limit < self._n_calls + 1: + raise RuntimeError(f"Global cost/call limit exceeded: ${self._cost:.4f} / {self._n_calls}") + + @property + def cost(self) -> float: + return self._cost + + @property + def n_calls(self) -> int: + return self._n_calls + + +GLOBAL_MODEL_STATS = GlobalModelStats() diff --git a/src/cooperbench/agents/mini_swe_agent_v2/models/litellm_model.py b/src/cooperbench/agents/mini_swe_agent_v2/models/litellm_model.py new file mode 100644 index 0000000..fa9dad1 --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/models/litellm_model.py @@ -0,0 +1,149 @@ +import json +import logging +import os +import time +from collections.abc import Callable +from pathlib import Path +from typing import Any, Literal + +import litellm +from pydantic import BaseModel + +from cooperbench.agents.mini_swe_agent_v2.models import GLOBAL_MODEL_STATS +from cooperbench.agents.mini_swe_agent_v2.models.utils.actions_toolcall import ( + BASH_TOOL, + SEND_MESSAGE_TOOL, + format_toolcall_observation_messages, + parse_toolcall_actions, +) +from cooperbench.agents.mini_swe_agent_v2.models.utils.anthropic_utils import _reorder_anthropic_thinking_blocks +from cooperbench.agents.mini_swe_agent_v2.models.utils.cache_control import set_cache_control +from cooperbench.agents.mini_swe_agent_v2.models.utils.openai_multimodal import expand_multimodal_content +from cooperbench.agents.mini_swe_agent_v2.models.utils.retry import retry + +logger = logging.getLogger("litellm_model") + + +class LitellmModelConfig(BaseModel): + model_name: str + """Model name. Highly recommended to include the provider in the model name, e.g., `anthropic/claude-sonnet-4-5-20250929`.""" + model_kwargs: dict[str, Any] = {} + """Additional arguments passed to the API.""" + litellm_model_registry: Path | str | None = os.getenv("LITELLM_MODEL_REGISTRY_PATH") + """Model registry for cost tracking and model metadata. See the local model guide (https://mini-swe-agent.com/latest/models/local_models/) for more details.""" + set_cache_control: Literal["default_end"] | None = None + """Set explicit cache control markers, for example for Anthropic models""" + cost_tracking: Literal["default", "ignore_errors"] = os.getenv("MSWEA_COST_TRACKING", "default") + """Cost tracking mode for this model. Can be "default" or "ignore_errors" (ignore errors/missing cost info)""" + format_error_template: str = "{{ error }}" + """Template used when the LM's output is not in the expected format.""" + observation_template: str = ( + "{% if output.exception_info %}{{output.exception_info}}\n{% endif %}" + "{{output.returncode}}\n\n{{output.output}}" + ) + """Template used to render the observation after executing an action.""" + multimodal_regex: str = "" + """Regex to extract multimodal content. Empty string disables multimodal processing.""" + + +class LitellmModel: + abort_exceptions: list[type[Exception]] = [ + litellm.exceptions.UnsupportedParamsError, + litellm.exceptions.NotFoundError, + litellm.exceptions.PermissionDeniedError, + litellm.exceptions.ContextWindowExceededError, + litellm.exceptions.AuthenticationError, + KeyboardInterrupt, + ] + + def __init__(self, *, config_class: Callable = LitellmModelConfig, extra_tools: list[dict] | None = None, **kwargs): + self.config = config_class(**kwargs) + self._tools = [BASH_TOOL] + (extra_tools or []) + if self.config.litellm_model_registry and Path(self.config.litellm_model_registry).is_file(): + litellm.utils.register_model(json.loads(Path(self.config.litellm_model_registry).read_text())) + + def _query(self, messages: list[dict[str, str]], **kwargs): + try: + return litellm.completion( + model=self.config.model_name, + messages=messages, + tools=self._tools, + **(self.config.model_kwargs | kwargs), + ) + except litellm.exceptions.AuthenticationError as e: + e.message += " You can permanently set your API key with `mini-extra config set KEY VALUE`." + raise e + + def _prepare_messages_for_api(self, messages: list[dict]) -> list[dict]: + prepared = [{k: v for k, v in msg.items() if k != "extra"} for msg in messages] + prepared = _reorder_anthropic_thinking_blocks(prepared) + return set_cache_control(prepared, mode=self.config.set_cache_control) + + def query(self, messages: list[dict[str, str]], **kwargs) -> dict: + for attempt in retry(logger=logger, abort_exceptions=self.abort_exceptions): + with attempt: + response = self._query(self._prepare_messages_for_api(messages), **kwargs) + cost_output = self._calculate_cost(response) + GLOBAL_MODEL_STATS.add(cost_output["cost"]) + message = response.choices[0].message.model_dump() + message["extra"] = { + "actions": self._parse_actions(response), + "response": response.model_dump(), + **cost_output, + "timestamp": time.time(), + } + return message + + def _calculate_cost(self, response) -> dict[str, float]: + try: + cost = litellm.cost_calculator.completion_cost(response, model=self.config.model_name) + if cost <= 0.0: + raise ValueError(f"Cost must be > 0.0, got {cost}") + except Exception as e: + cost = 0.0 + if self.config.cost_tracking != "ignore_errors": + msg = ( + f"Error calculating cost for model {self.config.model_name}: {e}, perhaps it's not registered? " + "You can ignore this issue from your config file with cost_tracking: 'ignore_errors' or " + "globally with export MSWEA_COST_TRACKING='ignore_errors'. " + "Alternatively check the 'Cost tracking' section in the documentation at " + "https://klieret.short.gy/mini-local-models. " + " Still stuck? Please open a github issue at https://github.com/SWE-agent/mini-swe-agent/issues/new/choose!" + ) + logger.critical(msg) + raise RuntimeError(msg) from e + return {"cost": cost} + + def _parse_actions(self, response) -> list[dict]: + """Parse tool calls from the response. Raises FormatError if unknown tool.""" + tool_calls = response.choices[0].message.tool_calls or [] + return parse_toolcall_actions(tool_calls, format_error_template=self.config.format_error_template) + + def format_message(self, **kwargs) -> dict: + return expand_multimodal_content(kwargs, pattern=self.config.multimodal_regex) + + def format_observation_messages( + self, message: dict, outputs: list[dict], template_vars: dict | None = None + ) -> list[dict]: + """Format execution outputs into tool result messages.""" + actions = message.get("extra", {}).get("actions", []) + return format_toolcall_observation_messages( + actions=actions, + outputs=outputs, + observation_template=self.config.observation_template, + template_vars=template_vars, + multimodal_regex=self.config.multimodal_regex, + ) + + def get_template_vars(self, **kwargs) -> dict[str, Any]: + return self.config.model_dump() + + def serialize(self) -> dict: + return { + "info": { + "config": { + "model": self.config.model_dump(mode="json"), + "model_type": f"{self.__class__.__module__}.{self.__class__.__name__}", + }, + } + } diff --git a/src/cooperbench/agents/mini_swe_agent_v2/models/utils/__init__.py b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/cooperbench/agents/mini_swe_agent_v2/models/utils/actions_toolcall.py b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/actions_toolcall.py new file mode 100644 index 0000000..47a8a34 --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/actions_toolcall.py @@ -0,0 +1,137 @@ +"""Parse actions & format observations with toolcalls""" + +import json +import time + +from jinja2 import StrictUndefined, Template + +from cooperbench.agents.mini_swe_agent_v2.exceptions import FormatError +from cooperbench.agents.mini_swe_agent_v2.models.utils.openai_multimodal import expand_multimodal_content + +BASH_TOOL = { + "type": "function", + "function": { + "name": "bash", + "description": "Execute a bash command", + "parameters": { + "type": "object", + "properties": { + "command": { + "type": "string", + "description": "The bash command to execute", + } + }, + "required": ["command"], + }, + }, +} + +SEND_MESSAGE_TOOL = { + "type": "function", + "function": { + "name": "send_message", + "description": "Send a message to another agent for inter-agent communication", + "parameters": { + "type": "object", + "properties": { + "recipient": { + "type": "string", + "description": "The agent ID to send the message to", + }, + "content": { + "type": "string", + "description": "The message content to send", + }, + }, + "required": ["recipient", "content"], + }, + }, +} + +KNOWN_TOOLS = {"bash", "send_message"} + + +def parse_toolcall_actions(tool_calls: list, *, format_error_template: str) -> list[dict]: + """Parse tool calls from the response. Raises FormatError if unknown tool or invalid args.""" + if not tool_calls: + raise FormatError( + { + "role": "user", + "content": Template(format_error_template, undefined=StrictUndefined).render( + error="No tool calls found in the response. Every response MUST include at least one tool call." + ), + "extra": {"interrupt_type": "FormatError"}, + } + ) + actions = [] + for tool_call in tool_calls: + error_msg = "" + args = {} + try: + args = json.loads(tool_call.function.arguments) + except Exception as e: + error_msg = f"Error parsing tool call arguments: {e}. " + + tool_name = tool_call.function.name + + if tool_name not in KNOWN_TOOLS: + error_msg += f"Unknown tool '{tool_name}'." + elif tool_name == "bash" and "command" not in args: + error_msg += "Missing 'command' argument in bash tool call." + elif tool_name == "send_message": + if "recipient" not in args: + error_msg += "Missing 'recipient' argument in send_message tool call." + if "content" not in args: + error_msg += "Missing 'content' argument in send_message tool call." + + if error_msg: + raise FormatError( + { + "role": "user", + "content": Template(format_error_template, undefined=StrictUndefined).render( + error=error_msg.strip() + ), + "extra": {"interrupt_type": "FormatError"}, + } + ) + + action = {"tool_name": tool_name, "tool_call_id": tool_call.id, **args} + actions.append(action) + return actions + + +def format_toolcall_observation_messages( + *, + actions: list[dict], + outputs: list[dict], + observation_template: str, + template_vars: dict | None = None, + multimodal_regex: str = "", +) -> list[dict]: + """Format execution outputs into tool result messages.""" + not_executed = {"output": "", "returncode": -1, "exception_info": "action was not executed"} + padded_outputs = outputs + [not_executed] * (len(actions) - len(outputs)) + results = [] + for action, output in zip(actions, padded_outputs): + content = Template(observation_template, undefined=StrictUndefined).render( + output=output, **(template_vars or {}) + ) + msg = { + "content": content, + "extra": { + "raw_output": output.get("output", ""), + "returncode": output.get("returncode"), + "timestamp": time.time(), + "exception_info": output.get("exception_info"), + **output.get("extra", {}), + }, + } + if "tool_call_id" in action: + msg["tool_call_id"] = action["tool_call_id"] + msg["role"] = "tool" + else: + msg["role"] = "user" # human issued commands + if multimodal_regex: + msg = expand_multimodal_content(msg, pattern=multimodal_regex) + results.append(msg) + return results diff --git a/src/cooperbench/agents/mini_swe_agent_v2/models/utils/anthropic_utils.py b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/anthropic_utils.py new file mode 100644 index 0000000..6636df2 --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/anthropic_utils.py @@ -0,0 +1,28 @@ +"""Utilities for Anthropic API compatibility.""" + + +def _is_anthropic_thinking_block(block) -> bool: + """Check if a content block is a thinking-type block.""" + if not isinstance(block, dict): + return False + return block.get("type") in ("thinking", "redacted_thinking") + + +def _reorder_anthropic_thinking_blocks(messages: list[dict]) -> list[dict]: + """Reorder thinking blocks so they are not the final block in assistant messages. + + This is an Anthropic API requirement: thinking blocks must come before other blocks. + """ + result = [] + for msg in messages: + if msg.get("role") == "assistant" and isinstance(msg.get("content"), list): + content = msg["content"] + thinking_blocks = [b for b in content if _is_anthropic_thinking_block(b)] + if thinking_blocks: + other_blocks = [b for b in content if not _is_anthropic_thinking_block(b)] + if other_blocks: + msg = {**msg, "content": thinking_blocks + other_blocks} + else: + msg = {**msg, "content": thinking_blocks + [{"type": "text", "text": ""}]} + result.append(msg) + return result diff --git a/src/cooperbench/agents/mini_swe_agent_v2/models/utils/cache_control.py b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/cache_control.py new file mode 100644 index 0000000..9c50ee0 --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/cache_control.py @@ -0,0 +1,67 @@ +"""Cache control utilities are mostly for Anthropic models. +They are used to explicitly set cache control points. +""" + +import copy +import warnings +from typing import Literal + + +def _get_content_text(entry: dict) -> str | None: + if entry["content"] is None: + return None + if isinstance(entry["content"], str): + return entry["content"] + assert len(entry["content"]) == 1, "Expected single message in content" + return entry["content"][0]["text"] + + +def _clear_cache_control(entry: dict) -> None: + if isinstance(entry["content"], list): + assert len(entry["content"]) == 1, "Expected single message in content" + entry["content"][0].pop("cache_control", None) + # Note: entry["content"] can be None for assistant messages with only tool_use + entry.pop("cache_control", None) + + +def _set_cache_control(entry: dict) -> None: + # Handle None content (e.g., assistant messages with only tool_use) + if entry["content"] is None: + entry["cache_control"] = {"type": "ephemeral"} + return + + if not isinstance(entry["content"], list): + entry["content"] = [ # type: ignore + { + "type": "text", + "text": _get_content_text(entry), + "cache_control": {"type": "ephemeral"}, + } + ] + else: + entry["content"][0]["cache_control"] = {"type": "ephemeral"} + if entry["role"] == "tool": + # Workaround for weird bug + entry["content"][0].pop("cache_control", None) + entry["cache_control"] = {"type": "ephemeral"} + + +def set_cache_control( + messages: list[dict], *, mode: Literal["default_end"] | None = "default_end", last_n_messages_offset: int = 0 +) -> list[dict]: + """This messages processor adds manual cache control marks to the messages.""" + if mode is None: + return messages + if mode != "default_end": + raise ValueError(f"Invalid mode: {mode}") + if last_n_messages_offset: + warnings.warn("last_n_messages_offset is deprecated and will be removed in the future. It has no effect.") + + messages = copy.deepcopy(messages) + new_messages = [] + for i_entry, entry in enumerate(reversed(messages)): + _clear_cache_control(entry) + if i_entry == 0: + _set_cache_control(entry) + new_messages.append(entry) + return list(reversed(new_messages)) diff --git a/src/cooperbench/agents/mini_swe_agent_v2/models/utils/openai_multimodal.py b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/openai_multimodal.py new file mode 100644 index 0000000..4f76fa5 --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/openai_multimodal.py @@ -0,0 +1,50 @@ +"""Utilities for handling multimodal content in OpenAI-style messages.""" + +import copy +import re +from typing import Any + +DEFAULT_MULTIMODAL_REGEX = ( + r"(?s)(.+?)(.+?)" +) + + +def _expand_content_string(*, content: str, pattern: str) -> list[dict]: + """Expand a content string, replacing multimodal tags with structured content.""" + matches = list(re.finditer(pattern, content)) + if not matches: + return [{"type": "text", "text": content}] + result = [] + last_end = 0 + for match in matches: + text_before = content[last_end : match.start()] + if text_before: + result.append({"type": "text", "text": text_before}) + content_type = match.group(1).strip() + extracted = match.group(2).strip() + if content_type == "image_url": + result.append({"type": "image_url", "image_url": {"url": extracted}}) + last_end = match.end() + text_after = content[last_end:] + if text_after: + result.append({"type": "text", "text": text_after}) + return result + + +def expand_multimodal_content(content: Any, *, pattern: str) -> Any: + """Recursively expand multimodal content in messages. + Note: Returns copy of content, original content is not modified. + """ + if not pattern: + return content + content = copy.deepcopy(content) + if isinstance(content, str): + return _expand_content_string(content=content, pattern=pattern) + if isinstance(content, list): + return [expand_multimodal_content(item, pattern=pattern) for item in content] + if isinstance(content, dict): + if "content" not in content: + return content + content["content"] = expand_multimodal_content(content["content"], pattern=pattern) + return content + return str(content) diff --git a/src/cooperbench/agents/mini_swe_agent_v2/models/utils/retry.py b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/retry.py new file mode 100644 index 0000000..055d4b6 --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/models/utils/retry.py @@ -0,0 +1,25 @@ +"""Retry utility for model queries.""" + +import logging +import os + +from tenacity import Retrying, before_sleep_log, retry_if_not_exception_type, stop_after_attempt, wait_exponential + + +def retry(*, logger: logging.Logger, abort_exceptions: list[type[Exception]]) -> Retrying: + """Thin wrapper around tenacity.Retrying to make use of global config etc. + + Args: + logger: Logger to use for reporting retries + abort_exceptions: Exceptions to abort on. + + Returns: + A tenacity.Retrying object. + """ + return Retrying( + reraise=True, + stop=stop_after_attempt(int(os.getenv("MSWEA_MODEL_RETRY_STOP_AFTER_ATTEMPT", "10"))), + wait=wait_exponential(multiplier=1, min=4, max=60), + before_sleep=before_sleep_log(logger, logging.WARNING), + retry=retry_if_not_exception_type(tuple(abort_exceptions)), + ) diff --git a/src/cooperbench/agents/mini_swe_agent_v2/utils/__init__.py b/src/cooperbench/agents/mini_swe_agent_v2/utils/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/src/cooperbench/agents/mini_swe_agent_v2/utils/log.py b/src/cooperbench/agents/mini_swe_agent_v2/utils/log.py new file mode 100644 index 0000000..8069455 --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/utils/log.py @@ -0,0 +1,36 @@ +import logging +from pathlib import Path + +from rich.logging import RichHandler + + +def _setup_root_logger() -> None: + logger = logging.getLogger("mini_swe_agent_v2") + logger.setLevel(logging.DEBUG) + _handler = RichHandler( + show_path=False, + show_time=False, + show_level=False, + markup=True, + ) + _formatter = logging.Formatter("%(name)s: %(levelname)s: %(message)s") + _handler.setFormatter(_formatter) + logger.addHandler(_handler) + + +def add_file_handler(path: Path | str, level: int = logging.DEBUG, *, print_path: bool = True) -> None: + logger = logging.getLogger("mini_swe_agent_v2") + handler = logging.FileHandler(path) + handler.setLevel(level) + formatter = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s") + handler.setFormatter(formatter) + logger.addHandler(handler) + if print_path: + print(f"Logging to '{path}'") + + +_setup_root_logger() +logger = logging.getLogger("mini_swe_agent_v2") + + +__all__ = ["logger"] diff --git a/src/cooperbench/agents/mini_swe_agent_v2/utils/serialize.py b/src/cooperbench/agents/mini_swe_agent_v2/utils/serialize.py new file mode 100644 index 0000000..ccd5947 --- /dev/null +++ b/src/cooperbench/agents/mini_swe_agent_v2/utils/serialize.py @@ -0,0 +1,29 @@ +from typing import Any + +UNSET = object() + + +def recursive_merge(*dictionaries: dict | None) -> dict: + """Merge multiple dictionaries recursively. + + Later dictionaries take precedence over earlier ones. + Nested dictionaries are merged recursively. + UNSET values are skipped. + """ + if not dictionaries: + return {} + result: dict[str, Any] = {} + for d in dictionaries: + if d is None: + continue + for key, value in d.items(): + if value is UNSET: + continue + if key in result and isinstance(result[key], dict) and isinstance(value, dict): + result[key] = recursive_merge(result[key], value) + elif isinstance(value, dict): + # Recursively merge dict values to filter out nested UNSET values + result[key] = recursive_merge(value) + else: + result[key] = value + return result diff --git a/src/cooperbench/agents/registry.py b/src/cooperbench/agents/registry.py index cc14d36..22f71ef 100644 --- a/src/cooperbench/agents/registry.py +++ b/src/cooperbench/agents/registry.py @@ -81,6 +81,10 @@ def _auto_register(): import cooperbench.agents.openhands_agent_sdk.adapter # noqa: F401 except ImportError: pass + try: + import cooperbench.agents.mini_swe_agent_v2.adapter # noqa: F401 + except ImportError: + pass # External agents via environment variable import os