From 959e1452b95d515518e8d97f3b4e86fab9399833 Mon Sep 17 00:00:00 2001 From: Elliot Fosong Date: Tue, 17 May 2022 19:35:28 +0100 Subject: [PATCH 1/6] add PettingZoo environment for LBF --- lbforaging.py | 2 +- lbforaging/__init__.py | 25 - lbforaging/agents/hba.py | 2 +- lbforaging/agents/heuristic_agent.py | 4 +- lbforaging/agents/monte_carlo.py | 2 +- lbforaging/agents/nn_agent.py | 2 +- lbforaging/agents/q_agent.py | 4 +- lbforaging/agents/random_agent.py | 2 +- lbforaging/foraging/__init__.py | 1 - lbforaging/gym_env/__init__.py | 26 + lbforaging/{foraging => gym_env}/agent.py | 0 .../{foraging => gym_env}/environment.py | 0 .../{foraging => gym_env}/icons/agent.png | Bin .../{foraging => gym_env}/icons/apple.png | Bin lbforaging/{foraging => gym_env}/rendering.py | 0 lbforaging/petting_zoo/__init__.py | 1 + lbforaging/petting_zoo/environment.py | 523 ++++++++++++++++++ lbforaging/petting_zoo/icons/agent.png | Bin 0 -> 3398 bytes lbforaging/petting_zoo/icons/apple.png | Bin 0 -> 1694 bytes lbforaging/petting_zoo/rendering.py | 245 ++++++++ setup.py | 2 +- tests/petting_zoo_test.py | 28 + tests/test_env.py | 6 +- 23 files changed, 836 insertions(+), 39 deletions(-) delete mode 100644 lbforaging/foraging/__init__.py create mode 100644 lbforaging/gym_env/__init__.py rename lbforaging/{foraging => gym_env}/agent.py (100%) rename lbforaging/{foraging => gym_env}/environment.py (100%) rename lbforaging/{foraging => gym_env}/icons/agent.png (100%) rename lbforaging/{foraging => gym_env}/icons/apple.png (100%) rename lbforaging/{foraging => gym_env}/rendering.py (100%) create mode 100644 lbforaging/petting_zoo/__init__.py create mode 100644 lbforaging/petting_zoo/environment.py create mode 100644 lbforaging/petting_zoo/icons/agent.png create mode 100644 lbforaging/petting_zoo/icons/apple.png create mode 100644 lbforaging/petting_zoo/rendering.py create mode 100644 tests/petting_zoo_test.py diff --git a/lbforaging.py b/lbforaging.py index 037e9d7..070fc81 100644 --- a/lbforaging.py +++ b/lbforaging.py @@ -4,7 +4,7 @@ import time import gym import numpy as np -import lbforaging +import lbforaging.gym_env logger = logging.getLogger(__name__) diff --git a/lbforaging/__init__.py b/lbforaging/__init__.py index a469f31..e69de29 100644 --- a/lbforaging/__init__.py +++ b/lbforaging/__init__.py @@ -1,25 +0,0 @@ -from gym.envs.registration import registry, register, make, spec -from itertools import product - -sizes = range(5, 20) -players = range(2, 20) -foods = range(1, 10) -coop = [True, False] -grid_observation = [True, False] - -for s, p, f, c, grid_obs in product(sizes, players, foods, coop, grid_observation): - for sight in range(1, s + 1): - register( - id="Foraging{5}{4}-{0}x{0}-{1}p-{2}f{3}-v2".format(s, p, f, "-coop" if c else "", "" if sight == s else f"-{sight}s", "-grid" if grid_obs else ""), - entry_point="lbforaging.foraging:ForagingEnv", - kwargs={ - "players": p, - "max_player_level": 3, - "field_size": (s, s), - "max_food": f, - "sight": sight, - "max_episode_steps": 50, - "force_coop": c, - "grid_observation": grid_obs, - }, - ) diff --git a/lbforaging/agents/hba.py b/lbforaging/agents/hba.py index 6a74c38..19bb872 100644 --- a/lbforaging/agents/hba.py +++ b/lbforaging/agents/hba.py @@ -1,5 +1,5 @@ from . import QAgent -from foraging import Env +from gym_env import Env import random import numpy as np from agents import H1, H2, H3, H4 diff --git a/lbforaging/agents/heuristic_agent.py b/lbforaging/agents/heuristic_agent.py index dd12689..464e317 100644 --- a/lbforaging/agents/heuristic_agent.py +++ b/lbforaging/agents/heuristic_agent.py @@ -1,7 +1,7 @@ import random import numpy as np -from foraging import Agent -from foraging.environment import Action +from gym_env import Agent +from gym_env.environment import Action class HeuristicAgent(Agent): diff --git a/lbforaging/agents/monte_carlo.py b/lbforaging/agents/monte_carlo.py index eb99505..d3f04ca 100644 --- a/lbforaging/agents/monte_carlo.py +++ b/lbforaging/agents/monte_carlo.py @@ -8,7 +8,7 @@ import plotly.graph_objs as go from networkx.drawing.nx_pydot import graphviz_layout -from foraging import Agent, Env +from gym_env import Agent, Env MCTS_DEPTH = 15 diff --git a/lbforaging/agents/nn_agent.py b/lbforaging/agents/nn_agent.py index 59b516c..7905191 100644 --- a/lbforaging/agents/nn_agent.py +++ b/lbforaging/agents/nn_agent.py @@ -1,6 +1,6 @@ import random -from foraging import Agent +from gym_env import Agent class NNAgent(Agent): diff --git a/lbforaging/agents/q_agent.py b/lbforaging/agents/q_agent.py index 5d6e631..5e0ad2d 100644 --- a/lbforaging/agents/q_agent.py +++ b/lbforaging/agents/q_agent.py @@ -5,8 +5,8 @@ import pandas as pd from agents import H1 -from lbforaging import Agent, Env -from lbforaging.environment import Action +from gym_env import Agent, Env +from gym_env.environment import Action _CACHE = None diff --git a/lbforaging/agents/random_agent.py b/lbforaging/agents/random_agent.py index fa136f3..a323131 100644 --- a/lbforaging/agents/random_agent.py +++ b/lbforaging/agents/random_agent.py @@ -1,6 +1,6 @@ import random -from lbforaging import Agent +from gym_env import Agent class RandomAgent(Agent): diff --git a/lbforaging/foraging/__init__.py b/lbforaging/foraging/__init__.py deleted file mode 100644 index 0fbbd18..0000000 --- a/lbforaging/foraging/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from lbforaging.foraging.environment import ForagingEnv diff --git a/lbforaging/gym_env/__init__.py b/lbforaging/gym_env/__init__.py new file mode 100644 index 0000000..85b0d19 --- /dev/null +++ b/lbforaging/gym_env/__init__.py @@ -0,0 +1,26 @@ +from lbforaging.gym_env.environment import ForagingEnv +from gym.envs.registration import registry, register, make, spec +from itertools import product + +sizes = range(5, 20) +players = range(2, 20) +foods = range(1, 10) +coop = [True, False] +grid_observation = [True, False] + +for s, p, f, c, grid_obs in product(sizes, players, foods, coop, grid_observation): + for sight in range(1, s + 1): + register( + id="Foraging{5}{4}-{0}x{0}-{1}p-{2}f{3}-v2".format(s, p, f, "-coop" if c else "", "" if sight == s else f"-{sight}s", "-grid" if grid_obs else ""), + entry_point="lbforaging.foraging:ForagingEnv", + kwargs={ + "players": p, + "max_player_level": 3, + "field_size": (s, s), + "max_food": f, + "sight": sight, + "max_episode_steps": 50, + "force_coop": c, + "grid_observation": grid_obs, + }, + ) diff --git a/lbforaging/foraging/agent.py b/lbforaging/gym_env/agent.py similarity index 100% rename from lbforaging/foraging/agent.py rename to lbforaging/gym_env/agent.py diff --git a/lbforaging/foraging/environment.py b/lbforaging/gym_env/environment.py similarity index 100% rename from lbforaging/foraging/environment.py rename to lbforaging/gym_env/environment.py diff --git a/lbforaging/foraging/icons/agent.png b/lbforaging/gym_env/icons/agent.png similarity index 100% rename from lbforaging/foraging/icons/agent.png rename to lbforaging/gym_env/icons/agent.png diff --git a/lbforaging/foraging/icons/apple.png b/lbforaging/gym_env/icons/apple.png similarity index 100% rename from lbforaging/foraging/icons/apple.png rename to lbforaging/gym_env/icons/apple.png diff --git a/lbforaging/foraging/rendering.py b/lbforaging/gym_env/rendering.py similarity index 100% rename from lbforaging/foraging/rendering.py rename to lbforaging/gym_env/rendering.py diff --git a/lbforaging/petting_zoo/__init__.py b/lbforaging/petting_zoo/__init__.py new file mode 100644 index 0000000..90dab35 --- /dev/null +++ b/lbforaging/petting_zoo/__init__.py @@ -0,0 +1 @@ +from .environment import env, parallel_env diff --git a/lbforaging/petting_zoo/environment.py b/lbforaging/petting_zoo/environment.py new file mode 100644 index 0000000..54c9b6f --- /dev/null +++ b/lbforaging/petting_zoo/environment.py @@ -0,0 +1,523 @@ +import functools +import logging +from collections import defaultdict +from copy import copy +from enum import Enum +import gym +from gym.utils import seeding +import numpy as np +from pettingzoo import ParallelEnv +from pettingzoo.utils import wrappers +from pettingzoo.utils import parallel_to_aec +from PIL import ImageColor + + +class Action(Enum): + NONE = 0 + NORTH = 1 + SOUTH = 2 + WEST = 3 + EAST = 4 + LOAD = 5 + + +class CellEntity(Enum): + # entity encodings for grid observations + OUT_OF_BOUNDS = 0 + EMPTY = 1 + FOOD = 2 + AGENT = 3 + +def env(**kwargs): + env = raw_env(**kwargs) + env = wrappers.AssertOutOfBoundsWrapper(env) + env = wrappers.OrderEnforcingWrapper(env) + return env + +def parallel_env(**kwargs): + env = ForagingEnvLite(**kwargs) + return env + +def raw_env(**kwargs): + env = parallel_env(**kwargs) + env = parallel_to_aec(env) + return env + +class ForagingEnvLite(ParallelEnv): + """ + A class that contains rules/actions for the game level-based foraging. + """ + + metadata = { + "name": "lbforaging_v2", + "render_modes": ["human"], + "render_fps": 4, + } + + action_set = [Action.NORTH, Action.SOUTH, Action.WEST, Action.EAST, Action.LOAD] + def __init__( + self, + n_players=2, + max_player_level=3, + field_size=(8,8), + max_food=3, + sight=8, + max_cycles=50, + force_coop=False, + player_levels=[], + food_levels=[], + agent_colors=[], + normalize_reward=True, + grid_observation=False, + penalty=0.0, + ): + # TODO sight = None, etc + self.logger = logging.getLogger(__name__) + self.seed() + + self.possible_agents = [f"player_{i}" for i in range(n_players)] + self.agent_name_mapping = {name: i for i, name in enumerate(self.possible_agents)} + self.agents = [] + self.pos = {} + self.specified_agent_levels = defaultdict(lambda: None) + for i, level in enumerate(player_levels): + if i >= n_players: + break + self.specified_agent_levels[self.possible_agents[i]] = level + self.agent_levels = {} + # TODO set agent colors + self.agent_colors = defaultdict(lambda: (0, 0, 0)) + for i, agent_color in enumerate(agent_colors): + if i >= n_players: + break + if isinstance(agent_color, list) or isinstance(agent_color, tuple): + self.agent_colors[self.possible_agents[i]] = agent_color + else: + self.agent_colors[self.possible_agents[i]] = ImageColor.getrgb(agent_color) + + + self.field = np.zeros(field_size, np.int32) + + self.penalty = penalty + self.max_food = max_food + self.specified_food_levels = [None] * self.max_food + self.specified_food_levels[:len(food_levels)] = food_levels + self._food_spawned = 0.0 + self.max_agent_level = max_player_level + self.sight = sight + self.force_coop = force_coop + self._game_over = None + + self._rendering_initialized = False + self._valid_actions = None + self._max_cycles = max_cycles + + self._normalize_reward = normalize_reward + self._grid_observation = grid_observation + + self.viewer = None + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + @functools.lru_cache(maxsize=None) + def observation_space(self, agent): + """The Observation Space for each agent. + - all of the board (board_size^2) with foods + - player description (x, y, level)*player_count + """ + if not self._grid_observation: + field_x = self.field.shape[1] + field_y = self.field.shape[0] + # field_size = field_x * field_y + + max_food = self.max_food + max_food_level = self.max_agent_level * len(self.possible_agents) + + min_obs_food = [-1, -1, 0] + max_obs_food = [field_x-1, field_y-1, max_food_level] + min_obs_agents = [-1, -1, 0] + max_obs_agents = [field_x-1, field_y-1, self.max_agent_level] + + min_obs = min_obs_food * max_food + min_obs_agents * len(self.possible_agents) + max_obs = max_obs_food * max_food + max_obs_agents * len(self.possible_agents) + else: + # grid observation space + grid_shape = (1 + 2 * self.sight, 1 + 2 * self.sight) + + # agents layer: agent levels + agents_min = np.zeros(grid_shape, dtype=np.float32) + agents_max = np.ones(grid_shape, dtype=np.float32) * self.max_agent_level + + # foods layer: foods level + max_food_level = self.max_agent_level * len(self.possible_agents) + foods_min = np.zeros(grid_shape, dtype=np.float32) + foods_max = np.ones(grid_shape, dtype=np.float32) * max_food_level + + # access layer: i the cell available + access_min = np.zeros(grid_shape, dtype=np.float32) + access_max = np.ones(grid_shape, dtype=np.float32) + + # total layer + min_obs = np.stack([agents_min, foods_min, access_min]) + max_obs = np.stack([agents_max, foods_max, access_max]) + return gym.spaces.Box(np.array(min_obs), np.array(max_obs), dtype=np.float32) + + @functools.lru_cache(maxsize=None) + def action_space(self, agent): + return gym.spaces.Discrete(6) + + @property + def field_size(self): + return self.field.shape + + @property + def field_length(self): + return self.field.size + + @property + def rows(self): + return self.field_size[0] + + @property + def cols(self): + return self.field_size[1] + + @property + def game_over(self): + return self._game_over + + def _gen_valid_moves(self): + self._valid_actions = { + agent: [ + action for action in Action if self._is_valid_action(agent, action) + ] + for agent in self.agents + } + + def _action_mask(self, agent): + return np.array([ + 1 if Action(i) in self._valid_actions[agent] else 0 + for i in range(self.action_space(agent).n) + ], dtype=np.int8) + + def neighborhood(self, row, col, distance=1, ignore_diag=False): + if not ignore_diag: + return self.field[ + max(row - distance, 0) : min(row + distance + 1, self.rows), + max(col - distance, 0) : min(col + distance + 1, self.cols), + ] + + return ( + self.field[ + max(row - distance, 0) : min(row + distance + 1, self.rows), col + ].sum() + + self.field[ + row, max(col - distance, 0) : min(col + distance + 1, self.cols) + ].sum() + ) + + def adjacent_food(self, row, col): + return ( + self.field[max(row - 1, 0), col] + + self.field[min(row + 1, self.rows - 1), col] + + self.field[row, max(col - 1, 0)] + + self.field[row, min(col + 1, self.cols - 1)] + ) + + def adjacent_food_location(self, row, col): + if row > 1 and self.field[row - 1, col] > 0: + return row - 1, col + elif row < self.rows - 1 and self.field[row + 1, col] > 0: + return row + 1, col + elif col > 1 and self.field[row, col - 1] > 0: + return row, col - 1 + elif col < self.cols - 1 and self.field[row, col + 1] > 0: + return row, col + 1 + + def adjacent_agents(self, row, col): + return [agent + for agent in self.agents + if abs(self.pos[agent][0] - row) == 1 + and self.pos[agent][1] == col + or abs(self.pos[agent][1] - col) == 1 + and self.pos[agent][0] == row + ] + + def spawn_food(self, max_level): + attempts = 0 + min_level = max_level if self.force_coop else 1 + for food_level in self.specified_food_levels: + while attempts < 1000: + attempts += 1 + row = self.np_random.randint(1, self.rows - 1) + col = self.np_random.randint(1, self.cols - 1) + + # check if it has neighbors: + if ( + self.neighborhood(row, col).sum() > 0 + or self.neighborhood(row, col, distance=2, ignore_diag=True) > 0 + or not self._is_empty_location(row, col) + ): + continue + + self.field[row, col] = (food_level + if food_level is not None + else self.np_random.randint(min_level, max_level+1) + ) + break + self._food_spawned = self.field.sum() + + def _is_empty_location(self, row, col): + if self.field[row, col] != 0: + return False + for pos in self.pos.values(): + if pos[0] == row and pos[1] == col: + return False + return True + + def spawn_agents(self, max_agent_level): + possible_indices = np.arange(self.field_length)[self.field.flatten()==0] + num_agents_to_spawn = len(self.agents) + spawn_indices = self.np_random.choice(possible_indices, + size=num_agents_to_spawn, + replace=False) + unraveled_indices = np.unravel_index(spawn_indices, shape=self.field_size) + unraveled_indices = list(zip(*unraveled_indices)) + for i, agent in enumerate(self.agents): + self.pos[agent] = unraveled_indices[i] + if self.specified_agent_levels[agent] is None: + self.agent_levels[agent] = self.np_random.randint(1, max_agent_level + 1) + else: + self.agent_levels[agent] = min(self.specified_agent_levels[agent], max_agent_level) + + def _is_valid_action(self, agent, action): + if action == Action.NONE: + return True + row_pos = self.pos[agent][0] + col_pos = self.pos[agent][1] + row_pos, col_pos = self.pos[agent] + row_pos_min, col_pos_min = (0, 0) + row_pos_max, col_pos_max = (self.rows-1, self.cols-1) + if action == Action.NORTH: + return ( + row_pos > row_pos_min + and self.field[row_pos-1, col_pos] == 0 + ) + if action == Action.SOUTH: + return ( + row_pos < row_pos_max + and self.field[row_pos+1, col_pos] == 0 + ) + if action == Action.WEST: + return ( + col_pos > col_pos_min + and self.field[row_pos, col_pos-1] == 0 + ) + if action == Action.EAST: + return ( + col_pos < col_pos_max + and self.field[row_pos, col_pos+1] == 0 + ) + if action == Action.LOAD: + return self.adjacent_food(*self.pos[agent]) > 0 + + self.logger.error("Undefined action {} from {}".format(action, agent)) + raise ValueError("Undefined action") + + def _transform_to_neighborhood(self, center, sight, position): + return ( + position[0] - center[0] + min(sight, center[0]), + position[1] - center[1] + min(sight, center[1]), + ) + + def get_valid_actions(self, agent): + # TODO + return self._valid_actions[agent] + + def reset(self, seed=None): + if seed is not None: + self.seed(seed=seed) + self.field = np.zeros(self.field_size, np.int32) + self.agents = copy(self.possible_agents) + self.spawn_agents(self.max_agent_level) + self.spawn_food( + max_level=sum(self.agent_levels.values()) + ) + self.current_step = 0 + self._game_over = False + self._gen_valid_moves() + + observations = {agent: self.observe(agent) for agent in self.agents} + return observations + + def step(self, actions): + self.current_step += 1 + + rewards = {agent: 0.0 for agent in self.agents} + actions = {agent: (Action(a) if Action(a) in self._valid_actions[agent] else Action.NONE) + for agent, a in actions.items()} + + loading_agents = set() + # move agents + # if two or more agents try to move to the same location they all fail + collisions = defaultdict(list) + + # so check for collisions + for agent, action in actions.items(): + if action == Action.NONE: + collisions[tuple(self.pos[agent])].append(agent) + elif action == Action.NORTH: + collisions[(self.pos[agent][0] - 1, self.pos[agent][1])].append(agent) + elif action == Action.SOUTH: + collisions[(self.pos[agent][0] + 1, self.pos[agent][1])].append(agent) + elif action == Action.WEST: + collisions[(self.pos[agent][0], self.pos[agent][1] - 1)].append(agent) + elif action == Action.EAST: + collisions[(self.pos[agent][0], self.pos[agent][1] + 1)].append(agent) + elif action == Action.LOAD: + collisions[tuple(self.pos[agent])].append(agent) + loading_agents.add(agent) + + # and do movements for non colliding agents + for pos, agents in collisions.items(): + if len(agents) > 1: # make sure no more than an agents will arrive at location + continue + self.pos[agents[0]] = pos + + # finally process the loadings: + while loading_agents: + # find adjacent food + agent = loading_agents.pop() + frow, fcol = self.adjacent_food_location(*self.pos[agent]) + food = self.field[frow, fcol] + + adj_agents = self.adjacent_agents(frow, fcol) + adj_agents = [ + a for a in adj_agents if a in loading_agents or a is agent + ] + + adj_agent_level = sum([self.agent_levels[a] for a in adj_agents]) + + loading_agents = loading_agents - set(adj_agents) + + if adj_agent_level < food: + # failed to load + for a in adj_agents: + rewards[a] -= self.penalty + continue + + # else the food was loaded and each agent scores points + for a in adj_agents: + rewards[a] = float(self.agent_levels[a] * food) + if self._normalize_reward: + rewards[a] = rewards[a] / float( + adj_agent_level * self._food_spawned + ) # normalize reward + # and the food is removed + self.field[frow, fcol] = 0 + + # TODO when pettingzoo distinguishes between 'done' and 'out of steps' will need to update + self._game_over = ( + self.field.sum() == 0 or self._max_cycles <= self.current_step + ) + dones = {agent: self._game_over for agent in self.agents} + + observations = {agent: self.observe(agent) for agent in self.agents} + + self._gen_valid_moves() + infos = {agent: {"action_mask": self._action_mask(agent)} + for agent in self.agents} + self.agents = [agent for agent in self.agents if not dones[agent]] + return observations, rewards, dones, infos + + def _get_global_grid_layers(self): + grid_shape_x, grid_shape_y = self.field_size + grid_shape_x += 2 * self.sight + grid_shape_y += 2 * self.sight + grid_shape = (grid_shape_x, grid_shape_y) + + # Agents layer: level & position of agents + agents_layer = np.zeros(grid_shape, dtype=np.float32) + for agent in self.agents: + row, col = self.pos[agent] + agents_layer[self.sight + row, self.sight + col] = self.agent_levels[agent] + + # Foods layer: level & position of foods + foods_layer = np.zeros(grid_shape, dtype=np.float32) + foods_layer[self.sight:-self.sight, self.sight:-self.sight] = self.field.copy() + + # Access layer: 1 if grid cells are accessible + access_layer = np.ones(grid_shape, dtype=np.float32) + # out of bounds not accessible + access_layer[:self.sight, :] = 0.0 + access_layer[-self.sight:, :] = 0.0 + access_layer[:, :self.sight] = 0.0 + access_layer[:, -self.sight:] = 0.0 + # agent locations are not accessible + for agent in self.agents: + row, col = self.pos[agent] + access_layer[self.sight + row, self.sight + col] = 0.0 + # food locations are not accessible + for row, col in zip(*self.field.nonzero()): + access_layer[self.sight + row, self.sight + col] = 0.0 + + return np.stack([agents_layer, foods_layer, access_layer]) + + def _get_grid_obs(self, agent): + global_grid_layers = self._get_global_grid_layers() + row, col = self.pos[agent] + start_row, end_row = row, row + 2*self.sight+1 + start_col, end_col = col, col + 2*self.sight+1 + return global_grid_layers[:, start_row:end_row, start_col:end_col] + + def _get_array_obs(self, agent): + obs = np.zeros(self.observation_space(agent).shape, dtype=np.float32) + local_field = self.neighborhood(*self.pos[agent], distance=self.sight) + obs[:3*self.max_food] = np.tile([-1, -1, 0], reps=self.max_food) + for i, (row, col) in enumerate(zip(*np.nonzero(local_field))): + obs[(3*i):(3*i+3)] = [row, col, local_field[row, col]] + + obs[3*self.max_food:] = np.tile([-1, -1, 0], reps=len(self.possible_agents)) + # self agent is always first + ordered_agents = [agent] + [a for a in self.possible_agents if a != agent] + for i, other_agent in enumerate(ordered_agents): + relative_pos = self._transform_to_neighborhood(self.pos[agent], + self.sight, + self.pos[other_agent]) + if self._in_sight(relative_pos): + idx = 3*self.max_food + 3*i + obs[idx:idx+3] = [*relative_pos, self.agent_levels[other_agent]] + return obs + + def _in_sight(self, relative_pos): + lower_bound = np.array([0, 0]) + upper_bound = np.array([2*self.sight, 2*self.sight]) + rpos = np.array(relative_pos) + return np.any((lower_bound < rpos) & (rpos < upper_bound)) + + def observe(self, agent): + if self._grid_observation: + obs = self._get_grid_obs(agent) + else: + obs = self._get_array_obs(agent) + assert self.observation_space(agent).contains(obs), \ + f"obs space error: obs: {obs}, obs_space: {self.observation_space(agent)}" + return obs + + def _init_render(self): + from .rendering import Viewer + + self.viewer = Viewer((self.rows, self.cols)) + self._rendering_initialized = True + + def render(self, mode="human"): + if not self._rendering_initialized: + self._init_render() + + return self.viewer.render(self, return_rgb_array=mode == "rgb_array") + + def close(self): + if self.viewer: + self.viewer.close() diff --git a/lbforaging/petting_zoo/icons/agent.png b/lbforaging/petting_zoo/icons/agent.png new file mode 100644 index 0000000000000000000000000000000000000000..dcf173f3bab3b91464dfcfff9efa28ccd1d01007 GIT binary patch literal 3398 zcmV-M4Y~4(P) zaB^>EX>4U6ba`-PAZ2)IW&i+q+O=70lJh7G{I65&5zHHo!?S8{u*ct)upOMqbH3e4 zB{&!hsat9wL;at>2mOUV<837CLiEY|4SzP-q%$t!Hvf`*s?Ex${Mr2$zF+$3yrI!$ z(3<%*tf%%RG)pW=Q;$KB9=<1lD=W{r!sI*EQg`_=9Vr9LlwieYuN=-Jl8 z<#>CmvG;}DJA9|L!v>(#940baccaWEpK*F%_^nMw>+q>ez&L#3TAN5{oH;@QAeZot zcH35fE+8LQbdUadbqBuAd%wBE((f4I;^7VD9)m9sFB6;7BYnLATH23r_V*Yqj?rgy z*?J)A_ly#-)nNt$3m`Z0n8z#N?Qk9BmGFvN-YO_>Tips?RVAgt${=M`Q&|nwrLC&M zMXS38CtB;Ojr2uulyo)BljWBhI+$ddiQug2PR5{HpS~!WH_3ceMa_RMImF}1vQ`nM&2kg8zN3Cs}z=B-Q<#i?6v z8R6MroI^WHNKt6J0YHSg9h^}H3~XK*r@Y#N)ao&0FgJ+S; z!2}yz@F9d4QpnLn8(s7<#28b|NhbphbjhcXVoE7zO&Jqq*vc56(NlCW#THk52_=?P zaz*-7TV3@v)L2u^O*hjZ6V12KVoNP|MIB1fZFk-G&|^W|kKtf`$fEllyaaI%J83gIxJSWX;s2FA2iU_3AbNNCNRZBbdPnK|ZcOBzMtG@NwI z4K>A#fkNArc0--q-I)7`H*)bK-t;@>h^hN8m?Kd4leY(~HD2ecFgB}@G2INR5BGb_ zq+{kqEx&7r-A43oY_*K80NOwRV6=@<%;=R=clcgPNTrO>$G{x$R>9jonlHVKEVJ_5 zU8NL8&mQAy_R{BD#t2P3=eA{iX-$PRYd}%orS=K|nJGJZw#Gql+&RhAYN<9z(5|gj=m?iGtnf>=}Ps3<@ZV zL54*w1`9DTY;lFfZl4jbw)A{JX>d-ZAV5u+GB7_tVu&1jG(n;PkofBbCWFjDKtEY{ z-Vz>SUVebV8=50W1dLmX)cX`^2!l9DqGVj)^z|@?q7y*j9eRxGvYT?sx)&|I8=$(R zU6~nGYm*3k2U^ADg77D#^d&jqfU|=UEPaiE)*yKPO&oFCw=JC=0nq-=1~hUQ@u)W= zJQ-aA%B9UdCB=9R;JN?wODBH#Pu~Ub+<*Emfafi}2|zRmInV0Ed8Dg{RM=S!7DWK3 zqN#b%z#=eAfHuO%UU`RJOdGl3FybKeYOH2Tx|)fh6=>xciC{(-Xp@*h(VBv|vtwW0 z*TvG(ej#&1Aub`*avuVwyG4-J2&jcJb&myr@yNivXx40}fVRS#G+2UM4_M6-q!stK zmMV%6CFh3T{v?lmRqP&5nosz(BBj} z&Y**s8@pF-VRR)I33x{@WaN_=c|Vq0WiJXWXyaNEcy1|) z;BOVtiVw7x1Y;4Eo4b**iBdQn#k{jJcX7Z)N_b|x1BpP#a8me2A#S@);S7OF*~A(j z0{N^zl}9X2d$7i%CA7MZqn`tL+9w{ZHtsy92d=;HL(=!C?FnI(U5lktRN9fh6T@^> zmG~~7=w&`iK7s8IB!*sTOhb2D!GFjXf{vV!q62g95oH^)It~}sLTKgYx4-z!V2?S` zG7W$~CVK17Odn?qXoq;!6l>2k zT;Ke;zR4;gsGMuKu`qc&=8}=!)j^DB~{AM$AYw3#zR6ov;OU)dF&@8!g@8fT}wAKr+Fq7Kt z$uWc8oHO`T%zd0*DG68msN!THIIPj{5c%0YGW4s|q+4#v2K42ooPS&%KhO4ZmxiC! zwE(b(FE(dFOFHv$bx&wX=c`51-yB?Dv{yq1M~THZIb?O)f8>bmbn7L@; z0@8AXIXBVk7I3a0%O)`G>p8FM3pey9=c1Jv`j2!?47r=AuczsCySa$jlWi}7&=2>m zuUVzJTra;F4BmetFa8BT6Lb>+p3(yV00D$)LqkwWLqi~Na&Km7Y-Iodc$|HaJxIeq z9K~N-iy{>jJBV}$Se-10ia2T&iclfc3avVrT>1q~8j=(jN5Qq=;KyRs!Nplu2UkH5 z`~Y!ua#D1W691PJTEuv8+>dwn9(V5mf4$69vtt}kHOojP;zB07Ds;Xgh<@}Tf?ttSBkO=fq$1yloC^;7d1ly1r{;;n#6qEs;lu(9+7_Ay9CQ`H?_wWxnevw=D^OF;9 zQYa1tUu^qh1nAra8a3PgKDO<~2@rS&uC$iFQU_)}Nw2lE=n>Gn4P0EeGEhp?gm000JJOGiWi{{a60|De66lK=n!32;bRa{vGVy8r+Iy8$}v&^HdtCQYE49wHWp3%XdV|iLKo(G z&%NiKuk;7Ax^QNG&v|Fgyzk5na*{d>>;ZNFYk>ivAGiSg1e^zs0&hmdICYUas(xxZ zom3yMy79H@5!=&;>ZU51zh9j)8_lY_t7QI3b>3`sS$(<+=C`W9+Zp|>?kcnSLG`?y z)kSq7J({a3RxpLI&SNOQr%iGKRk{cY?e=GD71T?5pcf#WP*X=z8Zz~@Qg z^l!Ts*mOnPnn7y)ZIUp}v<*P*0zLrlsn{3zZD3DC*lsqszk=og9s$PMiU4&L@IA1m zqWiG3z{ZGJXf*V}U`_oA~1sq=L clE#1OAM|wQR~1x-Q2+n{07*qoM6N<$f=_!#BLDyZ literal 0 HcmV?d00001 diff --git a/lbforaging/petting_zoo/icons/apple.png b/lbforaging/petting_zoo/icons/apple.png new file mode 100644 index 0000000000000000000000000000000000000000..bd27726fa425a41c05e8bcfed846ef050c3c32d1 GIT binary patch literal 1694 zcmX|C3pA8z7#_-HQxsxVw%RzEW=8YRf0_ARXPAjG$YnA{y10j_Y>75&Gl?#fF4R`W zbg{W471q|Klm-20HI$pcQC zWKL*u@YdMm)QD{{gw)hj$M`LY+oK|qVjQ<_i<1tyEFci{F9!JXIa5*BO8X>nN%Ky5>ifUY7 z6^6Cb={y>FHAITZN0lh*15*I-U%5g}r*&YM5=T@F773^ZDbC~=yafvD+7 zI|i#5C=VhjaV%Pj#>h}#8rcsb0YP7g6kC9ST%Z8_z$zJl46Ep9vYDE|*03ZuQb+RZ3X@NEhu(k~q zs0$Q8Iz!7;>yE&JlxRTET}R7=$bcZCIZiV{t2z-R9HM|beoNPJVyD$`MB53V0a-A# z0(9N4@N}X7)pW;iI@(UAd(id|#z>9Cs68X z4mBTz@;Ou~hbm)J&$6lKJfR{CD)xX%JfKpHR>p!Vn6zql+Fx$88aiC#O1tSwt8;gGY;!-Z}tmscE8W6exBC=&+DZh_mv;_ZGi7!px+06!0?*D;gFya@Ct%Q zL-`*=`D4O0pTa`M*9vlGm-vAXRhAs&A575G)6>_VX<%q%WMVdVp1HZjd@JjPi)?>b zYEN82B2lOij9_#pcTYCQi|fPl^A8LP5eUQAZ`cqS6SsNG)})l3yVBBkXJ%$)?~_O* zhmRaRcIs5VRCeaf*>i;#ip$EcR#aA2{ZW1GM$N6-+B%U4Zf$Em8L-wGlh+H9=E9w-EvV9w@zBJa!hw3KHjpUU#bE2MocA>V^-mi#X zxT~HsJ{VrepTG48w@sT87RvMa*&Uc1i@uJ-+IwrtjYNts~x@q#Wzs{Cw{$M(Jvev;9lrpR_9*{O|m9WYwGcGmD$b z-J93u_Zy`}ac>K5$EnLJE37?7BqQSD#ybC^Wp=xX-N-v{`%ORR{!-}B9)D(ebxock zXd5kled;EIps@*igP9(r{qjBM&;oh3XX5Ov9PvZDS<;!8g_f8}*mehzX|)HDY#BC# zC|Xkem8e)8)@wO1=Wv;Iv9P*~s`} lOSjb5WjU&4J;$?--LZR~;C`*hN3H!ia5?_$D;^v7{0HXc&+h;L literal 0 HcmV?d00001 diff --git a/lbforaging/petting_zoo/rendering.py b/lbforaging/petting_zoo/rendering.py new file mode 100644 index 0000000..f32f4dd --- /dev/null +++ b/lbforaging/petting_zoo/rendering.py @@ -0,0 +1,245 @@ +""" +2D rendering of the level based foraging domain +""" + +import math +import os +import sys + +import numpy as np +import math +import six +from gym import error + +if "Apple" in sys.version: + if "DYLD_FALLBACK_LIBRARY_PATH" in os.environ: + os.environ["DYLD_FALLBACK_LIBRARY_PATH"] += ":/usr/lib" + # (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite + + +try: + import pyglet +except ImportError as e: + raise ImportError( + """ + Cannot import pyglet. + HINT: you can install pyglet directly via 'pip install pyglet'. + But if you really just want to install all Gym dependencies and not have to think about it, + 'pip install -e .[all]' or 'pip install gym[all]' will do it. + """ + ) + +try: + from pyglet.gl import * +except ImportError as e: + raise ImportError( + """ + Error occured while running `from pyglet.gl import *` + HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. + If you're running on a server, you may need a virtual frame buffer; something like this should work: + 'xvfb-run -s \"-screen 0 1400x900x24\" python ' + """ + ) + + +RAD2DEG = 57.29577951308232 +# # Define some colors +_BLACK = (0, 0, 0) +_WHITE = (255, 255, 255) +_GREEN = (0, 255, 0) +_RED = (255, 0, 0) + +_BACKGROUND_COLOR = _WHITE +_GRID_COLOR = _BLACK + + +def get_display(spec): + """Convert a display specification (such as :0) into an actual Display + object. + Pyglet only supports multiple Displays on Linux. + """ + if spec is None: + return None + elif isinstance(spec, six.string_types): + return pyglet.canvas.Display(spec) + else: + raise error.Error( + "Invalid display specification: {}. (Must be a string like :0 or None.)".format( + spec + ) + ) + + +class Viewer(object): + def __init__(self, world_size): + display = get_display(None) + self.rows, self.cols = world_size + + self.grid_size = 50 + self.icon_size = 20 + + self.width = 1 + self.cols * (self.grid_size + 1) + self.height = 1 + self.rows * (self.grid_size + 1) + self.window = pyglet.window.Window( + width=self.width, height=self.height, display=display + ) + self.window.on_close = self.window_closed_by_user + self.isopen = True + + glEnable(GL_BLEND) + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA) + + script_dir = os.path.dirname(__file__) + + pyglet.resource.path = [os.path.join(script_dir, "icons")] + pyglet.resource.reindex() + + self.img_apple = pyglet.resource.image("apple.png") + self.img_agent = pyglet.resource.image("agent.png") + + def close(self): + self.window.close() + + def window_closed_by_user(self): + self.isopen = False + exit() + + def set_bounds(self, left, right, bottom, top): + assert right > left and top > bottom + scalex = self.width / (right - left) + scaley = self.height / (top - bottom) + self.transform = Transform( + translation=(-left * scalex, -bottom * scaley), scale=(scalex, scaley) + ) + + def render(self, env, return_rgb_array=False): + glClearColor(*_WHITE, 0) + self.window.clear() + self.window.switch_to() + self.window.dispatch_events() + + self._draw_grid() + self._draw_food(env) + self._draw_players(env) + + if return_rgb_array: + buffer = pyglet.image.get_buffer_manager().get_color_buffer() + image_data = buffer.get_image_data() + arr = np.frombuffer(image_data.get_data(), dtype=np.uint8) + arr = arr.reshape(buffer.height, buffer.width, 4) + arr = arr[::-1, :, 0:3] + self.window.flip() + return arr if return_rgb_array else self.isopen + + def _draw_grid(self): + batch = pyglet.graphics.Batch() + # vertical lines + for r in range(self.rows + 1): + batch.add( + 2, + gl.GL_LINES, + None, + ( + "v2f", + ( + 0, # LEFT X + (self.grid_size + 1) * r + 1, # Y + (self.grid_size + 1) * self.cols, # RIGHT X + (self.grid_size + 1) * r + 1, # Y + ), + ), + ("c3B", (*_BLACK, *_BLACK)), + ) + + # horizontal lines + for c in range(self.cols + 1): + batch.add( + 2, + gl.GL_LINES, + None, + ( + "v2f", + ( + (self.grid_size + 1) * c + 1, # X + 0, # BOTTOM Y + (self.grid_size + 1) * c + 1, # X + (self.grid_size + 1) * self.rows, # TOP X + ), + ), + ("c3B", (*_BLACK, *_BLACK)), + ) + batch.draw() + + def _draw_food(self, env): + idxes = list(zip(*env.field.nonzero())) + apples = [] + batch = pyglet.graphics.Batch() + + # print(env.field) + for row, col in idxes: + apples.append( + pyglet.sprite.Sprite( + self.img_apple, + (self.grid_size + 1) * col, + self.height - (self.grid_size + 1) * (row + 1), + batch=batch, + ) + ) + for a in apples: + a.update(scale=self.grid_size / a.width) + batch.draw() + + for row, col in idxes: + self._draw_badge(row, col, env.field[row, col]) + + def _draw_players(self, env): + agents = [] + batch = pyglet.graphics.Batch() + + for agent in env.agents: + row, col = env.pos[agent] + sprite = pyglet.sprite.Sprite( + self.img_agent, + (self.grid_size + 1) * col, + self.height - (self.grid_size + 1) * (row + 1), + batch=batch, + ) + sprite.color = env.agent_colors[agent] + agents.append(sprite) + for agent_sprite in agents: + agent_sprite.update(scale=self.grid_size / agent_sprite.width) + batch.draw() + for agent in env.agents: + self._draw_badge(*env.pos[agent], env.agent_levels[agent]) + + def _draw_badge(self, row, col, level): + resolution = 6 + radius = self.grid_size / 5 + + badge_x = col * (self.grid_size + 1) + (3 / 4) * (self.grid_size + 1) + badge_y = self.height - (self.grid_size + 1) * (row + 1) + (1 / 4) * (self.grid_size + 1) + + # make a circle + verts = [] + for i in range(resolution): + angle = 2 * math.pi * i / resolution + x = radius * math.cos(angle) + badge_x + y = radius * math.sin(angle) + badge_y + verts += [x, y] + circle = pyglet.graphics.vertex_list(resolution, ("v2f", verts)) + glColor3ub(*_WHITE) + circle.draw(GL_POLYGON) + glColor3ub(*_BLACK) + circle.draw(GL_LINE_LOOP) + label = pyglet.text.Label( + str(level), + font_name="Times New Roman", + font_size=12, + bold=True, + x=badge_x, + y=badge_y + 2, + anchor_x="center", + anchor_y="center", + color=(*_BLACK, 255), + ) + label.draw() diff --git a/setup.py b/setup.py index c91d310..d4ff15d 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", ], - install_requires=["numpy", "gym>=0.12", "pyglet"], + install_requires=["numpy", "gym>=0.12", "pyglet", "PettingZoo>=1.17.0"], extras_require={"test": ["pytest"]}, include_package_data=True, ) diff --git a/tests/petting_zoo_test.py b/tests/petting_zoo_test.py new file mode 100644 index 0000000..7d203f2 --- /dev/null +++ b/tests/petting_zoo_test.py @@ -0,0 +1,28 @@ +import pytest +import pettingzoo.test as pzt +import lbforaging.petting_zoo as lbf +import supersuit as ss +from fst.envs.agent_dict_concat_vecenv import agent_dict_concat_vec_env_v0 + +def max_cycles_test(): + pzt.max_cycles_test(lbf) + +def seed_test(): + pzt.seed_test(lbf.env) + +def api_test(): + env = lbf.env() + pzt.api_test(env, num_cycles=1000) + +def parallel_api_test(): + p_env = lbf.parallel_env() + pzt.parallel_api_test(p_env, num_cycles=1000) + +def vec_env_test(): + p_env = lbf.parallel_env() + mve = ss.pettingzoo_env_to_vec_env_v1(p_env) + +def concat_vec_env_test(): + p_env = lbf.parallel_env() + mve = ss.pettingzoo_env_to_vec_env_v1(p_env) + cve = ss.concat_vec_envs_v1(mve, 4) diff --git a/tests/test_env.py b/tests/test_env.py index e58c0f9..4556327 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -1,7 +1,7 @@ import pytest import numpy as np -import lbforaging -from lbforaging.foraging.environment import Action +import lbforaging.gym_env +from lbforaging.gym_env.environment import Action import gym @@ -166,4 +166,4 @@ def test_partial_obs_3(simple2p1f): obs, _, _, _ = env.step([Action.WEST, Action.NONE]) assert obs[0][-2] > -1 - assert obs[1][-2] > -1 \ No newline at end of file + assert obs[1][-2] > -1 From c8671e223a0ad2915c178d2cbc012f63c68dc52a Mon Sep 17 00:00:00 2001 From: Elliot Fosong Date: Thu, 30 Jun 2022 21:16:37 +0100 Subject: [PATCH 2/6] Update for new API --- lbforaging/petting_zoo/environment.py | 16 ++++++++++++++-- setup.py | 2 +- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/lbforaging/petting_zoo/environment.py b/lbforaging/petting_zoo/environment.py index 54c9b6f..3c50246 100644 --- a/lbforaging/petting_zoo/environment.py +++ b/lbforaging/petting_zoo/environment.py @@ -50,7 +50,7 @@ class ForagingEnvLite(ParallelEnv): metadata = { "name": "lbforaging_v2", - "render_modes": ["human"], + "render_modes": ["human", "rgb_array"], "render_fps": 4, } @@ -164,10 +164,18 @@ def observation_space(self, agent): max_obs = np.stack([agents_max, foods_max, access_max]) return gym.spaces.Box(np.array(min_obs), np.array(max_obs), dtype=np.float32) + @property + def observation_spaces(self): + return {agent: self.observation_space(agent) for agent in self.possible_agents} + @functools.lru_cache(maxsize=None) def action_space(self, agent): return gym.spaces.Discrete(6) + @property + def action_spaces(self): + return {agent: self.action_space(agent) for agent in self.possible_agents} + @property def field_size(self): return self.field.shape @@ -336,7 +344,7 @@ def get_valid_actions(self, agent): # TODO return self._valid_actions[agent] - def reset(self, seed=None): + def reset(self, seed=None, return_info=False, options=None): if seed is not None: self.seed(seed=seed) self.field = np.zeros(self.field_size, np.int32) @@ -350,6 +358,10 @@ def reset(self, seed=None): self._gen_valid_moves() observations = {agent: self.observe(agent) for agent in self.agents} + if return_info: + infos = {agent: {"action_mask": self._action_mask(agent)} + for agent in self.agents} + return observations, infos return observations def step(self, actions): diff --git a/setup.py b/setup.py index d4ff15d..fde4469 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", ], - install_requires=["numpy", "gym>=0.12", "pyglet", "PettingZoo>=1.17.0"], + install_requires=["numpy", "gym>=0.22.0", "pyglet", "pettingzoo>=1.17.0"], extras_require={"test": ["pytest"]}, include_package_data=True, ) From c59d7db44cbb7ca4e6972236ec7405c74679aa63 Mon Sep 17 00:00:00 2001 From: Elliot Fosong Date: Fri, 1 Jul 2022 10:28:39 +0100 Subject: [PATCH 3/6] Enable headless mode --- lbforaging/petting_zoo/rendering.py | 1 + 1 file changed, 1 insertion(+) diff --git a/lbforaging/petting_zoo/rendering.py b/lbforaging/petting_zoo/rendering.py index f32f4dd..5549172 100644 --- a/lbforaging/petting_zoo/rendering.py +++ b/lbforaging/petting_zoo/rendering.py @@ -19,6 +19,7 @@ try: import pyglet + pyglet.options["headless"] = True except ImportError as e: raise ImportError( """ From 5b5ed596a84eb1660b49876c965bca7feec5c791 Mon Sep 17 00:00:00 2001 From: Elliot Fosong Date: Tue, 19 Jul 2022 00:33:36 +0100 Subject: [PATCH 4/6] Add termination reason info --- lbforaging/petting_zoo/environment.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/lbforaging/petting_zoo/environment.py b/lbforaging/petting_zoo/environment.py index 3c50246..d02bf1c 100644 --- a/lbforaging/petting_zoo/environment.py +++ b/lbforaging/petting_zoo/environment.py @@ -355,11 +355,15 @@ def reset(self, seed=None, return_info=False, options=None): ) self.current_step = 0 self._game_over = False + self.terminated = False + self.truncated = False self._gen_valid_moves() observations = {agent: self.observe(agent) for agent in self.agents} if return_info: - infos = {agent: {"action_mask": self._action_mask(agent)} + infos = {agent: {"action_mask": self._action_mask(agent), + "terminated": self.terminated, + "truncated": self.truncated,} for agent in self.agents} return observations, infos return observations @@ -430,16 +434,18 @@ def step(self, actions): # and the food is removed self.field[frow, fcol] = 0 - # TODO when pettingzoo distinguishes between 'done' and 'out of steps' will need to update - self._game_over = ( - self.field.sum() == 0 or self._max_cycles <= self.current_step - ) + # TODO when pettingzoo distinguishes between 'done' and 'terminated/truncated' will need to update + self.terminated = self.field.sum == 0 + self.truncated = self._max_cycles <= self.current_step + self._game_over = self.terminated or self.truncated dones = {agent: self._game_over for agent in self.agents} observations = {agent: self.observe(agent) for agent in self.agents} self._gen_valid_moves() - infos = {agent: {"action_mask": self._action_mask(agent)} + infos = {agent: {"action_mask": self._action_mask(agent), + "terminated": self.terminated, + "truncated": self.truncated,} for agent in self.agents} self.agents = [agent for agent in self.agents if not dones[agent]] return observations, rewards, dones, infos From 2144be89bff74b076e6f30525519408146f6be5f Mon Sep 17 00:00:00 2001 From: Elliot Fosong Date: Mon, 3 Oct 2022 23:37:42 +0100 Subject: [PATCH 5/6] Update dependencies --- lbforaging/petting_zoo/environment.py | 22 ++++++++++++---------- setup.py | 2 +- 2 files changed, 13 insertions(+), 11 deletions(-) diff --git a/lbforaging/petting_zoo/environment.py b/lbforaging/petting_zoo/environment.py index d02bf1c..7d4e309 100644 --- a/lbforaging/petting_zoo/environment.py +++ b/lbforaging/petting_zoo/environment.py @@ -70,6 +70,7 @@ def __init__( normalize_reward=True, grid_observation=False, penalty=0.0, + render_mode="rgb_array" ): # TODO sight = None, etc self.logger = logging.getLogger(__name__) @@ -116,6 +117,7 @@ def __init__( self._grid_observation = grid_observation self.viewer = None + self.render_mode = render_mode def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) @@ -259,8 +261,8 @@ def spawn_food(self, max_level): for food_level in self.specified_food_levels: while attempts < 1000: attempts += 1 - row = self.np_random.randint(1, self.rows - 1) - col = self.np_random.randint(1, self.cols - 1) + row = self.np_random.integers(1, self.rows - 1) + col = self.np_random.integers(1, self.cols - 1) # check if it has neighbors: if ( @@ -272,7 +274,7 @@ def spawn_food(self, max_level): self.field[row, col] = (food_level if food_level is not None - else self.np_random.randint(min_level, max_level+1) + else self.np_random.integers(min_level, max_level+1) ) break self._food_spawned = self.field.sum() @@ -296,7 +298,7 @@ def spawn_agents(self, max_agent_level): for i, agent in enumerate(self.agents): self.pos[agent] = unraveled_indices[i] if self.specified_agent_levels[agent] is None: - self.agent_levels[agent] = self.np_random.randint(1, max_agent_level + 1) + self.agent_levels[agent] = self.np_random.integers(1, max_agent_level + 1) else: self.agent_levels[agent] = min(self.specified_agent_levels[agent], max_agent_level) @@ -361,9 +363,7 @@ def reset(self, seed=None, return_info=False, options=None): observations = {agent: self.observe(agent) for agent in self.agents} if return_info: - infos = {agent: {"action_mask": self._action_mask(agent), - "terminated": self.terminated, - "truncated": self.truncated,} + infos = {agent: {"action_mask": self._action_mask(agent)} for agent in self.agents} return observations, infos return observations @@ -437,6 +437,8 @@ def step(self, actions): # TODO when pettingzoo distinguishes between 'done' and 'terminated/truncated' will need to update self.terminated = self.field.sum == 0 self.truncated = self._max_cycles <= self.current_step + terminated = {agent: self.terminated for agent in self.agents} + truncated = {agent: self.truncated for agent in self.agents} self._game_over = self.terminated or self.truncated dones = {agent: self._game_over for agent in self.agents} @@ -448,7 +450,7 @@ def step(self, actions): "truncated": self.truncated,} for agent in self.agents} self.agents = [agent for agent in self.agents if not dones[agent]] - return observations, rewards, dones, infos + return observations, rewards, terminated, truncated, infos def _get_global_grid_layers(self): grid_shape_x, grid_shape_y = self.field_size @@ -530,11 +532,11 @@ def _init_render(self): self.viewer = Viewer((self.rows, self.cols)) self._rendering_initialized = True - def render(self, mode="human"): + def render(self): if not self._rendering_initialized: self._init_render() - return self.viewer.render(self, return_rgb_array=mode == "rgb_array") + return self.viewer.render(self, return_rgb_array=(self.render_mode=="rgb_array")) def close(self): if self.viewer: diff --git a/setup.py b/setup.py index fde4469..16f2553 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", ], - install_requires=["numpy", "gym>=0.22.0", "pyglet", "pettingzoo>=1.17.0"], + install_requires=["numpy>=1.23.1", "gym>=0.26.1", "pyglet", "pettingzoo>=1.21.0"], extras_require={"test": ["pytest"]}, include_package_data=True, ) From dcf653d295d7a6e8a7c499c0b2ec6d22fe363073 Mon Sep 17 00:00:00 2001 From: Elliot Fosong Date: Mon, 13 Nov 2023 17:38:20 +0100 Subject: [PATCH 6/6] Update gym add add simple render mode --- lbforaging/petting_zoo/environment.py | 39 +++++---- lbforaging/petting_zoo/simple_render.py | 107 ++++++++++++++++++++++++ setup.py | 15 +++- 3 files changed, 142 insertions(+), 19 deletions(-) create mode 100644 lbforaging/petting_zoo/simple_render.py diff --git a/lbforaging/petting_zoo/environment.py b/lbforaging/petting_zoo/environment.py index 7d4e309..37c21dc 100644 --- a/lbforaging/petting_zoo/environment.py +++ b/lbforaging/petting_zoo/environment.py @@ -3,8 +3,8 @@ from collections import defaultdict from copy import copy from enum import Enum -import gym -from gym.utils import seeding +import gymnasium +from gymnasium.utils import seeding import numpy as np from pettingzoo import ParallelEnv from pettingzoo.utils import wrappers @@ -70,7 +70,8 @@ def __init__( normalize_reward=True, grid_observation=False, penalty=0.0, - render_mode="rgb_array" + render_mode="rgb_array", + render_style="simple", ): # TODO sight = None, etc self.logger = logging.getLogger(__name__) @@ -118,6 +119,7 @@ def __init__( self.viewer = None self.render_mode = render_mode + self.render_style = render_style def seed(self, seed=None): self.np_random, seed = seeding.np_random(seed) @@ -138,9 +140,9 @@ def observation_space(self, agent): max_food_level = self.max_agent_level * len(self.possible_agents) min_obs_food = [-1, -1, 0] - max_obs_food = [field_x-1, field_y-1, max_food_level] + max_obs_food = [field_y-1, field_x-1, max_food_level] min_obs_agents = [-1, -1, 0] - max_obs_agents = [field_x-1, field_y-1, self.max_agent_level] + max_obs_agents = [field_y-1, field_x-1, self.max_agent_level] min_obs = min_obs_food * max_food + min_obs_agents * len(self.possible_agents) max_obs = max_obs_food * max_food + max_obs_agents * len(self.possible_agents) @@ -164,7 +166,7 @@ def observation_space(self, agent): # total layer min_obs = np.stack([agents_min, foods_min, access_min]) max_obs = np.stack([agents_max, foods_max, access_max]) - return gym.spaces.Box(np.array(min_obs), np.array(max_obs), dtype=np.float32) + return gymnasium.spaces.Box(np.array(min_obs), np.array(max_obs), dtype=np.float32) @property def observation_spaces(self): @@ -172,7 +174,7 @@ def observation_spaces(self): @functools.lru_cache(maxsize=None) def action_space(self, agent): - return gym.spaces.Discrete(6) + return gymnasium.spaces.Discrete(6) @property def action_spaces(self): @@ -362,11 +364,9 @@ def reset(self, seed=None, return_info=False, options=None): self._gen_valid_moves() observations = {agent: self.observe(agent) for agent in self.agents} - if return_info: - infos = {agent: {"action_mask": self._action_mask(agent)} - for agent in self.agents} - return observations, infos - return observations + infos = {agent: {"action_mask": self._action_mask(agent)} + for agent in self.agents} + return observations, infos def step(self, actions): self.current_step += 1 @@ -527,16 +527,21 @@ def observe(self, agent): return obs def _init_render(self): - from .rendering import Viewer - - self.viewer = Viewer((self.rows, self.cols)) + if self.render_style == "full": + from .rendering import Viewer + self.viewer = Viewer((self.rows, self.cols)) + elif self.render_style == "simple": + from .simple_render import render + self.simple_render = render self._rendering_initialized = True def render(self): if not self._rendering_initialized: self._init_render() - - return self.viewer.render(self, return_rgb_array=(self.render_mode=="rgb_array")) + if self.render_style == "full": + return self.viewer.render(self, return_rgb_array=(self.render_mode=="rgb_array")) + elif self.render_style == "simple": + return self.simple_render(self) def close(self): if self.viewer: diff --git a/lbforaging/petting_zoo/simple_render.py b/lbforaging/petting_zoo/simple_render.py new file mode 100644 index 0000000..88f337a --- /dev/null +++ b/lbforaging/petting_zoo/simple_render.py @@ -0,0 +1,107 @@ +import numpy as np +from PIL import Image + +PIXEL_SCALE = 3 +IMG_SCALE = 24 + +BASE_COLOUR = (0,0,255) +CHEQUER_V = 230 + +FOOD_BASE = ( 0,255,255) +FOOD_RING = ( 0,128,255) +FOOD_LVLS = ( 0,255,204) + +AGENT_BASE = (140,255,255) +AGENT_RING = (140,128,255) +AGENT_LVLS = (140,255,204) + +ring_points = ( + [(r, PIXEL_SCALE-1) for r in range(PIXEL_SCALE-1)] + + [(PIXEL_SCALE-1, c) for c in reversed(range(1,PIXEL_SCALE))] + + [(r, 0 ) for r in reversed(range(1,PIXEL_SCALE))] + + [(0, c ) for c in range(PIXEL_SCALE-1)] +) + +def _pixel_to_slice(pixel): + return slice(pixel*PIXEL_SCALE, (pixel+1)*PIXEL_SCALE) + + +def _color_to_arr(color): + return np.expand_dims(np.array(color, dtype=np.uint8), (1,2)) + + +def _food_pixel(lvl): + """Builds a food sprite of given level `lvl`.""" + pixel = np.tile( + _color_to_arr(FOOD_BASE), + (PIXEL_SCALE, PIXEL_SCALE) + ) + #draw the level indicator ring: + pixel[:, 0,:] = _color_to_arr(FOOD_RING).reshape(3,1) + pixel[:,-1,:] = _color_to_arr(FOOD_RING).reshape(3,1) + pixel[:,:, 0] = _color_to_arr(FOOD_RING).reshape(3,1) + pixel[:,:,-1] = _color_to_arr(FOOD_RING).reshape(3,1) + ring_start = PIXEL_SCALE//2 + for l in range(lvl): + point = ring_points[(l+ring_start)%len(ring_points)] + pixel[(slice(None), point[1], point[0])] = _color_to_arr(FOOD_LVLS).squeeze() + return pixel + + +def _agent_pixel(lvl): + """Builds a agent sprite of given level `lvl`.""" + pixel = np.tile( + _color_to_arr(AGENT_BASE), + (PIXEL_SCALE, PIXEL_SCALE) + ) + #draw the level indictator ring: + pixel[:, 0,:] = _color_to_arr(AGENT_RING).reshape(3,1) + pixel[:,-1,:] = _color_to_arr(AGENT_RING).reshape(3,1) + pixel[:,:, 0] = _color_to_arr(AGENT_RING).reshape(3,1) + pixel[:,:,-1] = _color_to_arr(AGENT_RING).reshape(3,1) + ring_start = PIXEL_SCALE//2 + for l in range(lvl): + point = ring_points[(l+ring_start)%len(ring_points)] + pixel[(slice(None), point[1], point[0])] = _color_to_arr(AGENT_LVLS).squeeze() + return pixel + + +def render(env): + """Renders the envrionment.""" + base_pixel = np.tile( + _color_to_arr(BASE_COLOUR), + (PIXEL_SCALE, PIXEL_SCALE) + ) + field_size = env.field_size + img = np.tile(base_pixel, field_size) + # chequer + for y in range(field_size[0]): + for x in range(field_size[1]): + if (x-y)%2 == 0: + r = _pixel_to_slice(y) + c = _pixel_to_slice(x) + img[2,r,c] = CHEQUER_V + + # Food + for (y,x) in zip(*np.nonzero(env.field)): + r = _pixel_to_slice(y) + c = _pixel_to_slice(x) + l = env.field[y,x] + img[:,r,c] = _food_pixel(l) + + # Agents + for agent, pos in env.pos.items(): + r = _pixel_to_slice(pos[0]) + c = _pixel_to_slice(pos[1]) + l = env.agent_levels[agent] + img[:,r,c] = _agent_pixel(l) + + rgb_image = Image.fromarray( + img.transpose((1,2,0)), mode="HSV" + ).convert( + "RGB" + ).resize( + (IMG_SCALE*img.shape[2], IMG_SCALE*img.shape[1]), + resample=Image.Resampling.NEAREST, + ) + return np.asarray(rgb_image) diff --git a/setup.py b/setup.py index 16f2553..0a3d7f7 100644 --- a/setup.py +++ b/setup.py @@ -13,8 +13,19 @@ "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", ], - install_requires=["numpy>=1.23.1", "gym>=0.26.1", "pyglet", "pettingzoo>=1.21.0"], - extras_require={"test": ["pytest"]}, + install_requires=[ + "numpy>=1.23.1", + "gymnasium>=0.26.2", + "pettingzoo>=1.23.0", + "pillow", + ], + extras_require={ + "test": ["pytest"], + "full_render": ["pyglet<2"], + }, include_package_data=True, )