diff --git a/codeclash/arenas/__init__.py b/codeclash/arenas/__init__.py index a955c7e1..9b46f564 100644 --- a/codeclash/arenas/__init__.py +++ b/codeclash/arenas/__init__.py @@ -1,4 +1,5 @@ from codeclash.arenas.arena import CodeArena +from codeclash.arenas.battlecode24.battlecode24 import BattleCode24Arena from codeclash.arenas.battlecode25.battlecode25 import BattleCode25Arena from codeclash.arenas.battlesnake.battlesnake import BattleSnakeArena from codeclash.arenas.bridge.bridge import BridgeArena @@ -14,6 +15,7 @@ from codeclash.arenas.robotrumble.robotrumble import RobotRumbleArena ARENAS = [ + BattleCode24Arena, BattleCode25Arena, BattleSnakeArena, BridgeArena, diff --git a/codeclash/arenas/battlecode24/BattleCode24.Dockerfile b/codeclash/arenas/battlecode24/BattleCode24.Dockerfile new file mode 100644 index 00000000..6cd92cd6 --- /dev/null +++ b/codeclash/arenas/battlecode24/BattleCode24.Dockerfile @@ -0,0 +1,14 @@ +FROM eclipse-temurin:8-jdk + +ENV JAVA_HOME=/opt/java/openjdk +RUN apt-get update && apt-get install -y --no-install-recommends \ + git curl unzip && \ + rm -rf /var/lib/apt/lists/* + +RUN git clone https://github.com/CodeClash-ai/BattleCode2024.git /workspace \ + && cd /workspace \ + && git remote set-url origin https://github.com/CodeClash-ai/BattleCode2024.git +WORKDIR /workspace + +RUN chmod +x gradlew && ./gradlew update + diff --git a/codeclash/arenas/battlecode24/__init__.py b/codeclash/arenas/battlecode24/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/codeclash/arenas/battlecode24/battlecode24.py b/codeclash/arenas/battlecode24/battlecode24.py new file mode 100644 index 00000000..50827ba0 --- /dev/null +++ b/codeclash/arenas/battlecode24/battlecode24.py @@ -0,0 +1,381 @@ +import re +import subprocess +from collections import defaultdict +from concurrent.futures import ThreadPoolExecutor, as_completed +from dataclasses import dataclass, field +from typing import Literal + +from tqdm.auto import tqdm + +from codeclash.agents.player import Player +from codeclash.arenas.arena import CodeArena, RoundStats +from codeclash.constants import DIR_WORK, RESULT_TIE + +BC24_LOG = "sim_{idx}.log" +BC24_FOLDER = "mysubmission" +BC24_TIE = "Reason: The winning team won arbitrarily (coin flip)." + + +@dataclass +class SimulationMeta: + """Metadata for a single simulation, storing team assignments explicitly.""" + idx: int + team_a: str + team_b: str + log_file: str + + +@dataclass +class RoundResult: + """Result of execute_round, used to communicate status to get_results.""" + status: Literal["completed", "auto_win", "no_contest"] + winner: str | None = None + loser: str | None = None + reason: str = "" + simulations: list[SimulationMeta] = field(default_factory=list) + + +class BattleCode24Arena(CodeArena): + """BattleCode24 arena implementation. + + Lifecycle: + 1. validate_code() - Source-level structural checks only (in agent container) + 2. execute_round() - Compile and run simulations (in game container) + 3. get_results() - Parse logs and determine winner + + Failure handling: + - If one agent fails to compile, the other wins automatically + - If both fail to compile, round is a no-contest (tie) + - Individual simulation failures don't count toward either player + """ + + name: str = "BattleCode24" + description: str = """Battlecode 2024: Breadwars is a real-time strategy game where your Java bot controls a team of robots competing to capture the opponent's flags. +Your mission: capture all 3 of the opponent's flags before they capture yours. Robots can attack, heal, build traps, dig/fill terrain, and specialize in different skills through experience. +The game features a setup phase (first 200 rounds) where teams are separated by a dam, followed by open combat. Robots gain experience and level up their attack, build, and heal specializations.""" + default_args: dict = { + "maps": "DefaultSmall", + } + submission: str = "src/mysubmission" + + def __init__(self, config, **kwargs): + super().__init__(config, **kwargs) + assert len(config["players"]) == 2, "BattleCode24 is a two-player game" + + # Build base run command + self.run_cmd_base: str = "./gradlew --no-daemon run" + for arg, val in self.game_config.get("args", self.default_args).items(): + if isinstance(val, bool): + if val: + self.run_cmd_base += f" -P{arg}=true" + else: + self.run_cmd_base += f" -P{arg}={val}" + + # Round state (set by execute_round, used by get_results) + self._round_result: RoundResult | None = None + + + def validate_code(self, agent: Player) -> tuple[bool, str | None]: + """Validate source structure. No compilation - that happens in execute_round. + + Checks: + 1. src/mysubmission/ directory exists + 2. RobotPlayer.java file exists + 3. run(RobotController rc) method signature present + 4. Correct package declaration + """ + # Check for mysubmission directory + ls_output = agent.environment.execute("ls src")["output"] + if BC24_FOLDER not in ls_output: + return False, f"There should be a `src/{BC24_FOLDER}/` directory" + + # Check for RobotPlayer.java file + ls_mysubmission = agent.environment.execute(f"ls src/{BC24_FOLDER}")["output"] + if "RobotPlayer.java" not in ls_mysubmission: + return False, f"There should be a `src/{BC24_FOLDER}/RobotPlayer.java` file" + + # Check for run(RobotController rc) method + robot_player_content = agent.environment.execute(f"cat src/{BC24_FOLDER}/RobotPlayer.java")["output"] + if "public static void run(RobotController" not in robot_player_content: + return False, f"There should be a `run(RobotController rc)` method implemented in `src/{BC24_FOLDER}/RobotPlayer.java`" + + # Check for correct package declaration + if f"package {BC24_FOLDER};" not in robot_player_content: + return False, f"The package declaration should be `package {BC24_FOLDER};` in `src/{BC24_FOLDER}/RobotPlayer.java`" + + return True, None + + + def _compile_agent(self, agent: Player, idx: int) -> str | None: + """Compile an agent's code in the game container. + + Args: + agent: The agent to compile + idx: Index for naming the output directory + + Returns: + Path to compiled classes directory, or None if compilation failed + """ + # Copy agent code to workspace + src = f"/{agent.name}/src/{BC24_FOLDER}/" + dest = str(DIR_WORK / "src" / BC24_FOLDER) + self.environment.execute(f"rm -rf {dest}; mkdir -p {dest}; cp -r {src}* {dest}/") + + # Compile (use clean to ensure fresh compilation, avoiding stale cache) + compile_result = self.environment.execute("./gradlew clean compileJava", timeout=120) + if compile_result["returncode"] != 0: + self.logger.warning( + f"Failed to compile agent {agent.name}:\n{compile_result['output'][-1000:]}" + ) + return None + + # Save compiled classes outside build/ (gradle clean deletes build/) + classes_dir = f"/tmp/agent{idx}_classes" + self.environment.execute( + f"rm -rf {classes_dir}; mkdir -p {classes_dir}; cp -r build/classes/* {classes_dir}/" + ) + + self.logger.info(f"Successfully compiled {agent.name}") + return classes_dir + + + def _run_simulation( + self, + sim_meta: SimulationMeta, + agents: list[Player], + agent_classes: dict[str, str], + ) -> None: + """Run a single simulation. + + Args: + sim_meta: Simulation metadata with team assignments + agents: List of agents (for name lookup) + agent_classes: Map of agent name -> compiled classes path + """ + cmd = ( + f"{self.run_cmd_base} " + f"-PteamA={sim_meta.team_a} " + f"-PteamB={sim_meta.team_b} " + f"-PpackageNameA=mysubmission " + f"-PpackageNameB=mysubmission " + f"-PclassLocationA={agent_classes[sim_meta.team_a]} " + f"-PclassLocationB={agent_classes[sim_meta.team_b]}" + ) + + try: + response = self.environment.execute( + cmd + f" > {self.log_env / sim_meta.log_file} 2>&1", + timeout=120, + ) + except subprocess.TimeoutExpired: + self.logger.warning(f"Simulation {sim_meta.idx} timed out") + return + + if response["returncode"] != 0: + self.logger.warning( + f"Simulation {sim_meta.idx} failed with exit code {response['returncode']}" + ) + + def execute_round(self, agents: list[Player]): + """Execute a round: compile all agents, then run simulations. + + Handles failures gracefully: + - If one agent fails to compile, the other wins automatically + - If both fail, round is a no-contest + """ + # Phase 1: Compile all agents + agent_classes: dict[str, str | None] = {} + for idx, agent in enumerate(agents): + classes_path = self._compile_agent(agent, idx) + agent_classes[agent.name] = classes_path + + # Check compilation results + compiled_agents = [a for a in agents if agent_classes[a.name] is not None] + failed_agents = [a for a in agents if agent_classes[a.name] is None] + + if len(compiled_agents) == 0: + self.logger.error("All agents failed to compile - no contest") + self._round_result = RoundResult( + status="no_contest", + reason="all agents failed to compile", + ) + return + + if len(compiled_agents) == 1: + winner = compiled_agents[0] + loser = failed_agents[0] + self.logger.info( + f"Only {winner.name} compiled successfully (opponent {loser.name} failed) - automatic win" + ) + self._round_result = RoundResult( + status="auto_win", + winner=winner.name, + loser=loser.name, + reason=f"{loser.name} failed to compile", + ) + return + + # Phase 2: Build simulation metadata with alternating team positions + num_sims = self.game_config["sims_per_round"] + simulations: list[SimulationMeta] = [] + + for idx in range(num_sims): + # Alternate team positions for fairness + if idx % 2 == 0: + team_a, team_b = agents[0].name, agents[1].name + else: + team_a, team_b = agents[1].name, agents[0].name + + simulations.append(SimulationMeta( + idx=idx, + team_a=team_a, + team_b=team_b, + log_file=BC24_LOG.format(idx=idx), + )) + + # Phase 3: Run simulations in parallel + self.logger.info(f"Running {num_sims} simulations with alternating team positions") + + # Filter to only compiled agents' classes + valid_classes = {name: path for name, path in agent_classes.items() if path is not None} + + with ThreadPoolExecutor(5) as executor: + futures = [ + executor.submit(self._run_simulation, sim, agents, valid_classes) + for sim in simulations + ] + for future in tqdm(as_completed(futures), total=len(futures), desc="Simulations"): + try: + future.result() + except Exception as e: + self.logger.error(f"Simulation raised unexpected exception: {e}") + + self._round_result = RoundResult( + status="completed", + simulations=simulations, + ) + + def _parse_simulation_log(self, log_path, sim_meta: SimulationMeta) -> str | None: + """Parse a single simulation log to determine the winner. + + Args: + log_path: Path to the log file + sim_meta: Simulation metadata with team assignments + + Returns: + Winner agent name, RESULT_TIE, or None if parsing failed + """ + if not log_path.exists(): + self.logger.debug(f"Simulation {sim_meta.idx}: log file missing") + return None + + with open(log_path) as f: + content = f.read().strip() + + lines = content.split("\n") + if len(lines) < 2: + self.logger.debug(f"Simulation {sim_meta.idx}: log too short (game crashed?)") + return None + + # Find the winner line (contains "wins" and "[server]") + winner_line = None + reason_line = None + for i, line in enumerate(lines): + if "wins" in line and "[server]" in line: + winner_line = line + if i + 1 < len(lines): + reason_line = lines[i + 1] + break + + if not winner_line: + self.logger.debug(f"Simulation {sim_meta.idx}: no winner line found") + return RESULT_TIE + + # Extract A or B from winner line: "mysubmission (A) wins" or "mysubmission (B) wins" + match = re.search(r"\(([AB])\)\s+wins", winner_line) + if not match: + self.logger.debug(f"Simulation {sim_meta.idx}: could not parse winner from line") + return RESULT_TIE + + winner_key = match.group(1) + + # Check for coin flip tie + if reason_line and BC24_TIE in reason_line: + return RESULT_TIE + + # Map A/B to agent names using stored metadata (no recalculation needed) + if winner_key == "A": + return sim_meta.team_a + else: + return sim_meta.team_b + + def get_results(self, agents: list[Player], round_num: int, stats: RoundStats): + """Parse simulation results and determine the round winner.""" + + # Handle early termination cases + if self._round_result is None: + self.logger.error("get_results called but execute_round didn't set _round_result") + stats.winner = RESULT_TIE + return + + if self._round_result.status == "no_contest": + self.logger.info(f"Round ended in no-contest: {self._round_result.reason}") + stats.winner = RESULT_TIE + # Split points evenly + points = self.game_config["sims_per_round"] / len(agents) + for agent in agents: + stats.scores[agent.name] = points + stats.player_stats[agent.name].score = points + stats.player_stats[agent.name].valid_submit = False + stats.player_stats[agent.name].invalid_reason = "Compilation failed (no contest)" + return + + if self._round_result.status == "auto_win": + winner = self._round_result.winner + loser = self._round_result.loser + self.logger.info(f"Round auto-win: {winner} ({self._round_result.reason})") + stats.winner = winner + stats.scores[winner] = self.game_config["sims_per_round"] + stats.player_stats[winner].score = self.game_config["sims_per_round"] + if loser and loser in stats.player_stats: + stats.player_stats[loser].valid_submit = False + stats.player_stats[loser].invalid_reason = f"Compilation failed: {self._round_result.reason}" + return + + # Normal case: parse simulation logs + scores = defaultdict(int) + + tie_count = 0 + for sim in self._round_result.simulations: + log_path = self.log_round(round_num) / sim.log_file + winner = self._parse_simulation_log(log_path, sim) + + if winner is None: + pass + elif winner == RESULT_TIE: + tie_count += 1 + else: + scores[winner] += 1 + + if tie_count > 0: + self.logger.info(f"{tie_count} simulation(s) ended in tie") + + # Determine overall winner + if scores: + # Find max score, check for ties + max_score = max(scores.values()) + leaders = [name for name, score in scores.items() if score == max_score] + + if len(leaders) == 1: + stats.winner = leaders[0] + else: + stats.winner = RESULT_TIE + else: + # All simulations failed + self.logger.warning("All simulations failed to produce results") + stats.winner = RESULT_TIE + + for player, score in scores.items(): + stats.scores[player] = score + if player != RESULT_TIE: + stats.player_stats[player].score = score diff --git a/configs/test/battlecode24.yaml b/configs/test/battlecode24.yaml new file mode 100644 index 00000000..40b17b92 --- /dev/null +++ b/configs/test/battlecode24.yaml @@ -0,0 +1,22 @@ +tournament: + rounds: 3 +game: + name: BattleCode24 + sims_per_round: 20 +players: +- agent: dummy + name: p1 +- agent: dummy + name: p2 +prompts: + game_description: | + You are a software developer ({{player_id}}) competing in a coding game called BattleCode24. + Battlecode 2024: Breadwars is a real-time strategy game where your Java bot controls a team of robots competing to capture the opponent's flags. + Your mission: capture all 3 of the opponent's flags before they capture yours. Robots can attack, heal, build traps, dig/fill terrain, and specialize in different skills through experience. + The game features a setup phase (first 200 rounds) where teams are separated by a dam, followed by open combat. Robots gain experience and level up their attack, build, and heal specializations. + + The game is played in {{rounds}} rounds. For every round, you (and your competitor) edit program code that controls your bot. This is round {{round}}. + After you and your competitor finish editing your codebases, the game is run automatically. + + Your task: improve the bot in `src/mysubmission`, located in {{working_dir}}. + {{working_dir}} is your codebase, which contains both your bot and supporting assets. diff --git a/tests/arenas/test_battlecode24.py b/tests/arenas/test_battlecode24.py new file mode 100644 index 00000000..8e6babfe --- /dev/null +++ b/tests/arenas/test_battlecode24.py @@ -0,0 +1,567 @@ +""" +Unit tests for BattleCode24Arena. + +Tests validate_code(), execute_round(), and get_results() methods without requiring Docker. +""" + +from unittest.mock import MagicMock, patch + +import pytest + +from codeclash.arenas.arena import RoundStats +from codeclash.arenas.battlecode24.battlecode24 import BattleCode24Arena, RoundResult, SimulationMeta +from codeclash.constants import RESULT_TIE + +from .conftest import MockPlayer + +VALID_ROBOT_PLAYER = """ +package mysubmission; + +import battlecode.common.*; + +public class RobotPlayer { + public static void run(RobotController rc) throws GameActionException { + while (true) { + // Game logic here + Clock.yield(); + } + } +} +""" + + +class TestBattleCode24Validation: + """Tests for BattleCode24Arena.validate_code()""" + + @pytest.fixture + def arena(self, tmp_log_dir, minimal_config): + """Create BattleCode24Arena instance with mocked environment.""" + config = minimal_config.copy() + config["game"]["name"] = "BattleCode24" + config["game"]["sims_per_round"] = 10 + config["players"] = [ + {"name": "p1", "agent": "dummy"}, + {"name": "p2", "agent": "dummy"}, + ] + arena = BattleCode24Arena.__new__(BattleCode24Arena) + arena.submission = "src/mysubmission" + arena.log_local = tmp_log_dir + arena.config = config + return arena + + def test_valid_submission(self, arena, mock_player_factory): + """Test that a valid BattleCode24 submission passes validation.""" + player = mock_player_factory( + name="test_player", + files={ + "src/mysubmission/RobotPlayer.java": VALID_ROBOT_PLAYER, + }, + command_outputs={ + "ls src": {"output": "mysubmission\n", "returncode": 0}, + "ls src/mysubmission": {"output": "RobotPlayer.java\n", "returncode": 0}, + "cat src/mysubmission/RobotPlayer.java": {"output": VALID_ROBOT_PLAYER, "returncode": 0}, + }, + ) + is_valid, error = arena.validate_code(player) + assert is_valid is True + assert error is None + + def test_missing_mysubmission_directory(self, arena, mock_player_factory): + """Test validation fails when src/mysubmission/ directory is missing.""" + player = mock_player_factory( + name="test_player", + files={}, + command_outputs={ + "ls src": {"output": "other_dir\n", "returncode": 0}, + }, + ) + is_valid, error = arena.validate_code(player) + assert is_valid is False + assert "src/mysubmission/" in error + + def test_missing_robot_player_file(self, arena, mock_player_factory): + """Test validation fails when RobotPlayer.java is missing.""" + player = mock_player_factory( + name="test_player", + files={}, + command_outputs={ + "ls src": {"output": "mysubmission\n", "returncode": 0}, + "ls src/mysubmission": {"output": "OtherFile.java\n", "returncode": 0}, + }, + ) + is_valid, error = arena.validate_code(player) + assert is_valid is False + assert "RobotPlayer.java" in error + + def test_missing_run_method(self, arena, mock_player_factory): + """Test validation fails when run(RobotController rc) method is missing.""" + invalid_code = """ +package mysubmission; + +import battlecode.common.*; + +public class RobotPlayer { + public static void main(String[] args) { + // Wrong method signature + } +} +""" + player = mock_player_factory( + name="test_player", + files={"src/mysubmission/RobotPlayer.java": invalid_code}, + command_outputs={ + "ls src": {"output": "mysubmission\n", "returncode": 0}, + "ls src/mysubmission": {"output": "RobotPlayer.java\n", "returncode": 0}, + "cat src/mysubmission/RobotPlayer.java": {"output": invalid_code, "returncode": 0}, + }, + ) + is_valid, error = arena.validate_code(player) + assert is_valid is False + assert "run(RobotController" in error + + def test_wrong_package_declaration(self, arena, mock_player_factory): + """Test validation fails when package declaration is incorrect.""" + invalid_code = """ +package wrongpackage; + +import battlecode.common.*; + +public class RobotPlayer { + public static void run(RobotController rc) throws GameActionException { + while (true) { + Clock.yield(); + } + } +} +""" + player = mock_player_factory( + name="test_player", + files={"src/mysubmission/RobotPlayer.java": invalid_code}, + command_outputs={ + "ls src": {"output": "mysubmission\n", "returncode": 0}, + "ls src/mysubmission": {"output": "RobotPlayer.java\n", "returncode": 0}, + "cat src/mysubmission/RobotPlayer.java": {"output": invalid_code, "returncode": 0}, + }, + ) + is_valid, error = arena.validate_code(player) + assert is_valid is False + assert "package mysubmission;" in error + + +class TestBattleCode24SimulationParsing: + """Tests for BattleCode24Arena._parse_simulation_log()""" + + @pytest.fixture + def arena(self, tmp_log_dir, minimal_config): + """Create BattleCode24Arena instance.""" + config = minimal_config.copy() + config["game"]["name"] = "BattleCode24" + config["game"]["sims_per_round"] = 10 + config["players"] = [ + {"name": "p1", "agent": "dummy"}, + {"name": "p2", "agent": "dummy"}, + ] + arena = BattleCode24Arena.__new__(BattleCode24Arena) + arena.submission = "src/mysubmission" + arena.log_local = tmp_log_dir + arena.config = config + arena.logger = MagicMock() + return arena + + def test_parse_team_a_wins(self, arena, tmp_log_dir): + """Test parsing when team A wins.""" + log_file = tmp_log_dir / "sim_0.log" + log_file.write_text( + "[server] Game starting\n[server] mysubmission (A) wins (1234)\nReason: Team A captured all flags.\n" + ) + + sim_meta = SimulationMeta(idx=0, team_a="Alice", team_b="Bob", log_file="sim_0.log") + winner = arena._parse_simulation_log(log_file, sim_meta) + assert winner == "Alice" + + def test_parse_team_b_wins(self, arena, tmp_log_dir): + """Test parsing when team B wins.""" + log_file = tmp_log_dir / "sim_0.log" + log_file.write_text( + "[server] Game starting\n[server] mysubmission (B) wins (5678)\nReason: Team B captured all flags.\n" + ) + + sim_meta = SimulationMeta(idx=0, team_a="Alice", team_b="Bob", log_file="sim_0.log") + winner = arena._parse_simulation_log(log_file, sim_meta) + assert winner == "Bob" + + def test_parse_coin_flip_tie(self, arena, tmp_log_dir): + """Test parsing when game ends in a coin flip tie.""" + log_file = tmp_log_dir / "sim_0.log" + log_file.write_text( + "[server] Game starting\n" + "[server] mysubmission (A) wins (1234)\n" + "Reason: The winning team won arbitrarily (coin flip).\n" + ) + + sim_meta = SimulationMeta(idx=0, team_a="Alice", team_b="Bob", log_file="sim_0.log") + winner = arena._parse_simulation_log(log_file, sim_meta) + assert winner == RESULT_TIE + + def test_parse_missing_log_file(self, arena, tmp_log_dir): + """Test parsing when log file doesn't exist (simulation failed).""" + log_file = tmp_log_dir / "nonexistent.log" + + sim_meta = SimulationMeta(idx=0, team_a="Alice", team_b="Bob", log_file="nonexistent.log") + winner = arena._parse_simulation_log(log_file, sim_meta) + assert winner is None + + def test_parse_truncated_log(self, arena, tmp_log_dir): + """Test parsing when log is too short (game crashed early).""" + log_file = tmp_log_dir / "sim_0.log" + log_file.write_text("[server] Starting...\n") + + sim_meta = SimulationMeta(idx=0, team_a="Alice", team_b="Bob", log_file="sim_0.log") + winner = arena._parse_simulation_log(log_file, sim_meta) + assert winner is None + + def test_parse_no_winner_line(self, arena, tmp_log_dir): + """Test parsing when winner line is missing.""" + log_file = tmp_log_dir / "sim_0.log" + log_file.write_text("[server] Game starting\n[server] Game ended\nSome other output\n") + + sim_meta = SimulationMeta(idx=0, team_a="Alice", team_b="Bob", log_file="sim_0.log") + winner = arena._parse_simulation_log(log_file, sim_meta) + assert winner == RESULT_TIE + + def test_parse_alternating_team_positions(self, arena, tmp_log_dir): + """Test that team position alternation is correctly handled.""" + # Simulation 0: Alice is A, Bob is B, A wins + log_file_0 = tmp_log_dir / "sim_0.log" + log_file_0.write_text("[server] mysubmission (A) wins (1234)\nReason: Team A won.\n") + sim_meta_0 = SimulationMeta(idx=0, team_a="Alice", team_b="Bob", log_file="sim_0.log") + winner_0 = arena._parse_simulation_log(log_file_0, sim_meta_0) + assert winner_0 == "Alice" + + # Simulation 1: Bob is A, Alice is B, A wins + log_file_1 = tmp_log_dir / "sim_1.log" + log_file_1.write_text("[server] mysubmission (A) wins (5678)\nReason: Team A won.\n") + sim_meta_1 = SimulationMeta(idx=1, team_a="Bob", team_b="Alice", log_file="sim_1.log") + winner_1 = arena._parse_simulation_log(log_file_1, sim_meta_1) + assert winner_1 == "Bob" + + +class TestBattleCode24Results: + """Tests for BattleCode24Arena.get_results()""" + + @pytest.fixture + def arena(self, tmp_log_dir, minimal_config): + """Create BattleCode24Arena instance.""" + config = minimal_config.copy() + config["game"]["name"] = "BattleCode24" + config["game"]["sims_per_round"] = 10 + config["players"] = [ + {"name": "p1", "agent": "dummy"}, + {"name": "p2", "agent": "dummy"}, + ] + arena = BattleCode24Arena.__new__(BattleCode24Arena) + arena.submission = "src/mysubmission" + arena.log_local = tmp_log_dir + arena.config = config + arena.logger = MagicMock() + return arena + + def _create_round_log_dir(self, tmp_log_dir, round_num): + """Helper to create round log directory.""" + round_dir = tmp_log_dir / "rounds" / str(round_num) + round_dir.mkdir(parents=True, exist_ok=True) + return round_dir + + def _create_sim_log(self, round_dir, idx: int, winner_team: str, is_tie: bool = False): + """Helper to create a simulation log file.""" + log_file = round_dir / f"sim_{idx}.log" + if is_tie: + content = ( + f"[server] mysubmission ({winner_team}) wins (1234)\n" + "Reason: The winning team won arbitrarily (coin flip).\n" + ) + else: + content = f"[server] mysubmission ({winner_team}) wins (1234)\nReason: Team won by capturing flags.\n" + log_file.write_text(content) + + def test_get_results_clear_winner(self, arena, tmp_log_dir): + """Test get_results when one player clearly wins.""" + round_dir = self._create_round_log_dir(tmp_log_dir, 1) + + # Create simulations with Alice winning 7, Bob winning 3 + simulations = [] + for idx in range(10): + if idx < 7: + # Alice as team A wins + self._create_sim_log(round_dir, idx, "A") + simulations.append(SimulationMeta(idx=idx, team_a="Alice", team_b="Bob", log_file=f"sim_{idx}.log")) + else: + # Bob as team B wins + self._create_sim_log(round_dir, idx, "B") + simulations.append(SimulationMeta(idx=idx, team_a="Alice", team_b="Bob", log_file=f"sim_{idx}.log")) + + arena._round_result = RoundResult(status="completed", simulations=simulations) + + agents = [MockPlayer("Alice"), MockPlayer("Bob")] + stats = RoundStats(round_num=1, agents=agents) + + arena.get_results(agents, round_num=1, stats=stats) + + assert stats.winner == "Alice" + assert stats.scores["Alice"] == 7 + assert stats.scores["Bob"] == 3 + assert stats.player_stats["Alice"].score == 7 + assert stats.player_stats["Bob"].score == 3 + + def test_get_results_with_ties(self, arena, tmp_log_dir): + """Test get_results when some simulations end in ties.""" + round_dir = self._create_round_log_dir(tmp_log_dir, 1) + + simulations = [] + # Alice wins 4, Bob wins 3, 3 ties + for idx in range(10): + if idx < 4: + self._create_sim_log(round_dir, idx, "A") + simulations.append(SimulationMeta(idx=idx, team_a="Alice", team_b="Bob", log_file=f"sim_{idx}.log")) + elif idx < 7: + self._create_sim_log(round_dir, idx, "B") + simulations.append(SimulationMeta(idx=idx, team_a="Alice", team_b="Bob", log_file=f"sim_{idx}.log")) + else: + self._create_sim_log(round_dir, idx, "A", is_tie=True) + simulations.append(SimulationMeta(idx=idx, team_a="Alice", team_b="Bob", log_file=f"sim_{idx}.log")) + + arena._round_result = RoundResult(status="completed", simulations=simulations) + + agents = [MockPlayer("Alice"), MockPlayer("Bob")] + stats = RoundStats(round_num=1, agents=agents) + + arena.get_results(agents, round_num=1, stats=stats) + + assert stats.winner == "Alice" + assert stats.scores["Alice"] == 4 + assert stats.scores["Bob"] == 3 + + def test_get_results_equal_scores(self, arena, tmp_log_dir): + """Test get_results when both players have equal scores.""" + round_dir = self._create_round_log_dir(tmp_log_dir, 1) + + simulations = [] + # Alice wins 5, Bob wins 5 + for idx in range(10): + if idx < 5: + self._create_sim_log(round_dir, idx, "A") + simulations.append(SimulationMeta(idx=idx, team_a="Alice", team_b="Bob", log_file=f"sim_{idx}.log")) + else: + self._create_sim_log(round_dir, idx, "B") + simulations.append(SimulationMeta(idx=idx, team_a="Alice", team_b="Bob", log_file=f"sim_{idx}.log")) + + arena._round_result = RoundResult(status="completed", simulations=simulations) + + agents = [MockPlayer("Alice"), MockPlayer("Bob")] + stats = RoundStats(round_num=1, agents=agents) + + arena.get_results(agents, round_num=1, stats=stats) + + assert stats.winner == RESULT_TIE + assert stats.scores["Alice"] == 5 + assert stats.scores["Bob"] == 5 + + def test_get_results_auto_win(self, arena, tmp_log_dir): + """Test get_results when one player wins due to opponent's compilation failure.""" + arena._round_result = RoundResult( + status="auto_win", + winner="Alice", + loser="Bob", + reason="Bob failed to compile", + ) + + agents = [MockPlayer("Alice"), MockPlayer("Bob")] + stats = RoundStats(round_num=1, agents=agents) + + arena.get_results(agents, round_num=1, stats=stats) + + assert stats.winner == "Alice" + assert stats.scores["Alice"] == 10 # Gets all sims_per_round points + assert stats.player_stats["Alice"].score == 10 + assert stats.player_stats["Bob"].valid_submit is False + assert "Compilation failed" in stats.player_stats["Bob"].invalid_reason + + def test_get_results_no_contest(self, arena, tmp_log_dir): + """Test get_results when both players fail to compile.""" + arena._round_result = RoundResult( + status="no_contest", + reason="all agents failed to compile", + ) + + agents = [MockPlayer("Alice"), MockPlayer("Bob")] + stats = RoundStats(round_num=1, agents=agents) + + arena.get_results(agents, round_num=1, stats=stats) + + assert stats.winner == RESULT_TIE + assert stats.scores["Alice"] == 5 # Split points evenly + assert stats.scores["Bob"] == 5 + assert stats.player_stats["Alice"].valid_submit is False + assert stats.player_stats["Bob"].valid_submit is False + assert "Compilation failed" in stats.player_stats["Alice"].invalid_reason + assert "Compilation failed" in stats.player_stats["Bob"].invalid_reason + + def test_get_results_all_simulations_failed(self, arena, tmp_log_dir): + """Test get_results when all simulations fail to produce results.""" + # Create simulations but no log files (all failed) + simulations = [] + for idx in range(10): + simulations.append(SimulationMeta(idx=idx, team_a="Alice", team_b="Bob", log_file=f"sim_{idx}.log")) + + arena._round_result = RoundResult(status="completed", simulations=simulations) + + agents = [MockPlayer("Alice"), MockPlayer("Bob")] + stats = RoundStats(round_num=1, agents=agents) + + arena.get_results(agents, round_num=1, stats=stats) + + assert stats.winner == RESULT_TIE + # No scores recorded when all simulations fail + assert stats.scores["Alice"] == 0 + assert stats.scores["Bob"] == 0 + + def test_get_results_missing_round_result(self, arena, tmp_log_dir): + """Test get_results when execute_round didn't set _round_result.""" + arena._round_result = None + + agents = [MockPlayer("Alice"), MockPlayer("Bob")] + stats = RoundStats(round_num=1, agents=agents) + + arena.get_results(agents, round_num=1, stats=stats) + + assert stats.winner == RESULT_TIE + + +class TestBattleCode24Config: + """Tests for BattleCode24Arena configuration and properties.""" + + def test_arena_name(self): + """Test that arena has correct name.""" + assert BattleCode24Arena.name == "BattleCode24" + + def test_submission_path(self): + """Test that submission path is correct.""" + assert BattleCode24Arena.submission == "src/mysubmission" + + def test_default_args(self): + """Test default arena arguments.""" + assert BattleCode24Arena.default_args["maps"] == "DefaultSmall" + + def test_initialization_with_two_players(self, tmp_log_dir, minimal_config): + """Test arena initialization with exactly 2 players.""" + config = minimal_config.copy() + config["game"]["name"] = "BattleCode24" + config["game"]["args"] = {"maps": "CustomMap"} + config["players"] = [ + {"name": "p1", "agent": "dummy"}, + {"name": "p2", "agent": "dummy"}, + ] + + # Mock the parent __init__ since we're testing a partial initialization + with patch.object(BattleCode24Arena.__bases__[0], "__init__", return_value=None): + arena = BattleCode24Arena.__new__(BattleCode24Arena) + arena.config = config + arena._round_result = None + + # Verify run command is built correctly + arena.run_cmd_base = "./gradlew --no-daemon run" + for arg, val in config["game"]["args"].items(): + if isinstance(val, bool): + if val: + arena.run_cmd_base += f" -P{arg}=true" + else: + arena.run_cmd_base += f" -P{arg}={val}" + + assert "-Pmaps=CustomMap" in arena.run_cmd_base + + def test_initialization_fails_with_wrong_player_count(self, tmp_log_dir, minimal_config): + """Test that initialization fails with != 2 players.""" + config = minimal_config.copy() + config["game"]["name"] = "BattleCode24" + config["players"] = [{"name": "p1", "agent": "dummy"}] # Only 1 player + + with pytest.raises(AssertionError, match="two-player game"): + with patch.object(BattleCode24Arena.__bases__[0], "__init__", return_value=None): + BattleCode24Arena(config, tournament_id="test", local_output_dir=tmp_log_dir) + + +class TestBattleCode24SimulationMetadata: + """Tests for SimulationMeta dataclass and team position alternation.""" + + def test_simulation_meta_creation(self): + """Test creating SimulationMeta with team assignments.""" + meta = SimulationMeta(idx=0, team_a="Alice", team_b="Bob", log_file="sim_0.log") + + assert meta.idx == 0 + assert meta.team_a == "Alice" + assert meta.team_b == "Bob" + assert meta.log_file == "sim_0.log" + + def test_team_position_alternation_pattern(self): + """Test that team positions alternate correctly across simulations.""" + agents = ["Alice", "Bob"] + simulations = [] + + for idx in range(6): + if idx % 2 == 0: + team_a, team_b = agents[0], agents[1] + else: + team_a, team_b = agents[1], agents[0] + + simulations.append(SimulationMeta(idx=idx, team_a=team_a, team_b=team_b, log_file=f"sim_{idx}.log")) + + # Verify alternation pattern + assert simulations[0].team_a == "Alice" and simulations[0].team_b == "Bob" + assert simulations[1].team_a == "Bob" and simulations[1].team_b == "Alice" + assert simulations[2].team_a == "Alice" and simulations[2].team_b == "Bob" + assert simulations[3].team_a == "Bob" and simulations[3].team_b == "Alice" + assert simulations[4].team_a == "Alice" and simulations[4].team_b == "Bob" + assert simulations[5].team_a == "Bob" and simulations[5].team_b == "Alice" + + +class TestBattleCode24RoundResult: + """Tests for RoundResult dataclass.""" + + def test_round_result_completed(self): + """Test RoundResult for completed round.""" + simulations = [ + SimulationMeta(idx=0, team_a="Alice", team_b="Bob", log_file="sim_0.log"), + SimulationMeta(idx=1, team_a="Bob", team_b="Alice", log_file="sim_1.log"), + ] + result = RoundResult(status="completed", simulations=simulations) + + assert result.status == "completed" + assert result.winner is None + assert result.loser is None + assert result.reason == "" + assert len(result.simulations) == 2 + + def test_round_result_auto_win(self): + """Test RoundResult for auto-win scenario.""" + result = RoundResult( + status="auto_win", + winner="Alice", + loser="Bob", + reason="Bob failed to compile", + ) + + assert result.status == "auto_win" + assert result.winner == "Alice" + assert result.loser == "Bob" + assert "compile" in result.reason + assert len(result.simulations) == 0 + + def test_round_result_no_contest(self): + """Test RoundResult for no-contest scenario.""" + result = RoundResult(status="no_contest", reason="all agents failed to compile") + + assert result.status == "no_contest" + assert result.winner is None + assert result.loser is None + assert "all agents failed" in result.reason + assert len(result.simulations) == 0