diff --git a/lbforaging.py b/lbforaging.py index 037e9d7..070fc81 100644 --- a/lbforaging.py +++ b/lbforaging.py @@ -4,7 +4,7 @@ import time import gym import numpy as np -import lbforaging +import lbforaging.gym_env logger = logging.getLogger(__name__) diff --git a/lbforaging/__init__.py b/lbforaging/__init__.py index a469f31..e69de29 100644 --- a/lbforaging/__init__.py +++ b/lbforaging/__init__.py @@ -1,25 +0,0 @@ -from gym.envs.registration import registry, register, make, spec -from itertools import product - -sizes = range(5, 20) -players = range(2, 20) -foods = range(1, 10) -coop = [True, False] -grid_observation = [True, False] - -for s, p, f, c, grid_obs in product(sizes, players, foods, coop, grid_observation): - for sight in range(1, s + 1): - register( - id="Foraging{5}{4}-{0}x{0}-{1}p-{2}f{3}-v2".format(s, p, f, "-coop" if c else "", "" if sight == s else f"-{sight}s", "-grid" if grid_obs else ""), - entry_point="lbforaging.foraging:ForagingEnv", - kwargs={ - "players": p, - "max_player_level": 3, - "field_size": (s, s), - "max_food": f, - "sight": sight, - "max_episode_steps": 50, - "force_coop": c, - "grid_observation": grid_obs, - }, - ) diff --git a/lbforaging/agents/hba.py b/lbforaging/agents/hba.py index 6a74c38..19bb872 100644 --- a/lbforaging/agents/hba.py +++ b/lbforaging/agents/hba.py @@ -1,5 +1,5 @@ from . import QAgent -from foraging import Env +from gym_env import Env import random import numpy as np from agents import H1, H2, H3, H4 diff --git a/lbforaging/agents/heuristic_agent.py b/lbforaging/agents/heuristic_agent.py index dd12689..464e317 100644 --- a/lbforaging/agents/heuristic_agent.py +++ b/lbforaging/agents/heuristic_agent.py @@ -1,7 +1,7 @@ import random import numpy as np -from foraging import Agent -from foraging.environment import Action +from gym_env import Agent +from gym_env.environment import Action class HeuristicAgent(Agent): diff --git a/lbforaging/agents/monte_carlo.py b/lbforaging/agents/monte_carlo.py index eb99505..d3f04ca 100644 --- a/lbforaging/agents/monte_carlo.py +++ b/lbforaging/agents/monte_carlo.py @@ -8,7 +8,7 @@ import plotly.graph_objs as go from networkx.drawing.nx_pydot import graphviz_layout -from foraging import Agent, Env +from gym_env import Agent, Env MCTS_DEPTH = 15 diff --git a/lbforaging/agents/nn_agent.py b/lbforaging/agents/nn_agent.py index 59b516c..7905191 100644 --- a/lbforaging/agents/nn_agent.py +++ b/lbforaging/agents/nn_agent.py @@ -1,6 +1,6 @@ import random -from foraging import Agent +from gym_env import Agent class NNAgent(Agent): diff --git a/lbforaging/agents/q_agent.py b/lbforaging/agents/q_agent.py index 5d6e631..5e0ad2d 100644 --- a/lbforaging/agents/q_agent.py +++ b/lbforaging/agents/q_agent.py @@ -5,8 +5,8 @@ import pandas as pd from agents import H1 -from lbforaging import Agent, Env -from lbforaging.environment import Action +from gym_env import Agent, Env +from gym_env.environment import Action _CACHE = None diff --git a/lbforaging/agents/random_agent.py b/lbforaging/agents/random_agent.py index fa136f3..a323131 100644 --- a/lbforaging/agents/random_agent.py +++ b/lbforaging/agents/random_agent.py @@ -1,6 +1,6 @@ import random -from lbforaging import Agent +from gym_env import Agent class RandomAgent(Agent): diff --git a/lbforaging/foraging/__init__.py b/lbforaging/foraging/__init__.py deleted file mode 100644 index 0fbbd18..0000000 --- a/lbforaging/foraging/__init__.py +++ /dev/null @@ -1 +0,0 @@ -from lbforaging.foraging.environment import ForagingEnv diff --git a/lbforaging/gym_env/__init__.py b/lbforaging/gym_env/__init__.py new file mode 100644 index 0000000..85b0d19 --- /dev/null +++ b/lbforaging/gym_env/__init__.py @@ -0,0 +1,26 @@ +from lbforaging.gym_env.environment import ForagingEnv +from gym.envs.registration import registry, register, make, spec +from itertools import product + +sizes = range(5, 20) +players = range(2, 20) +foods = range(1, 10) +coop = [True, False] +grid_observation = [True, False] + +for s, p, f, c, grid_obs in product(sizes, players, foods, coop, grid_observation): + for sight in range(1, s + 1): + register( + id="Foraging{5}{4}-{0}x{0}-{1}p-{2}f{3}-v2".format(s, p, f, "-coop" if c else "", "" if sight == s else f"-{sight}s", "-grid" if grid_obs else ""), + entry_point="lbforaging.foraging:ForagingEnv", + kwargs={ + "players": p, + "max_player_level": 3, + "field_size": (s, s), + "max_food": f, + "sight": sight, + "max_episode_steps": 50, + "force_coop": c, + "grid_observation": grid_obs, + }, + ) diff --git a/lbforaging/foraging/agent.py b/lbforaging/gym_env/agent.py similarity index 100% rename from lbforaging/foraging/agent.py rename to lbforaging/gym_env/agent.py diff --git a/lbforaging/foraging/environment.py b/lbforaging/gym_env/environment.py similarity index 100% rename from lbforaging/foraging/environment.py rename to lbforaging/gym_env/environment.py diff --git a/lbforaging/foraging/icons/agent.png b/lbforaging/gym_env/icons/agent.png similarity index 100% rename from lbforaging/foraging/icons/agent.png rename to lbforaging/gym_env/icons/agent.png diff --git a/lbforaging/foraging/icons/apple.png b/lbforaging/gym_env/icons/apple.png similarity index 100% rename from lbforaging/foraging/icons/apple.png rename to lbforaging/gym_env/icons/apple.png diff --git a/lbforaging/foraging/rendering.py b/lbforaging/gym_env/rendering.py similarity index 100% rename from lbforaging/foraging/rendering.py rename to lbforaging/gym_env/rendering.py diff --git a/lbforaging/petting_zoo/__init__.py b/lbforaging/petting_zoo/__init__.py new file mode 100644 index 0000000..90dab35 --- /dev/null +++ b/lbforaging/petting_zoo/__init__.py @@ -0,0 +1 @@ +from .environment import env, parallel_env diff --git a/lbforaging/petting_zoo/environment.py b/lbforaging/petting_zoo/environment.py new file mode 100644 index 0000000..37c21dc --- /dev/null +++ b/lbforaging/petting_zoo/environment.py @@ -0,0 +1,548 @@ +import functools +import logging +from collections import defaultdict +from copy import copy +from enum import Enum +import gymnasium +from gymnasium.utils import seeding +import numpy as np +from pettingzoo import ParallelEnv +from pettingzoo.utils import wrappers +from pettingzoo.utils import parallel_to_aec +from PIL import ImageColor + + +class Action(Enum): + NONE = 0 + NORTH = 1 + SOUTH = 2 + WEST = 3 + EAST = 4 + LOAD = 5 + + +class CellEntity(Enum): + # entity encodings for grid observations + OUT_OF_BOUNDS = 0 + EMPTY = 1 + FOOD = 2 + AGENT = 3 + +def env(**kwargs): + env = raw_env(**kwargs) + env = wrappers.AssertOutOfBoundsWrapper(env) + env = wrappers.OrderEnforcingWrapper(env) + return env + +def parallel_env(**kwargs): + env = ForagingEnvLite(**kwargs) + return env + +def raw_env(**kwargs): + env = parallel_env(**kwargs) + env = parallel_to_aec(env) + return env + +class ForagingEnvLite(ParallelEnv): + """ + A class that contains rules/actions for the game level-based foraging. + """ + + metadata = { + "name": "lbforaging_v2", + "render_modes": ["human", "rgb_array"], + "render_fps": 4, + } + + action_set = [Action.NORTH, Action.SOUTH, Action.WEST, Action.EAST, Action.LOAD] + def __init__( + self, + n_players=2, + max_player_level=3, + field_size=(8,8), + max_food=3, + sight=8, + max_cycles=50, + force_coop=False, + player_levels=[], + food_levels=[], + agent_colors=[], + normalize_reward=True, + grid_observation=False, + penalty=0.0, + render_mode="rgb_array", + render_style="simple", + ): + # TODO sight = None, etc + self.logger = logging.getLogger(__name__) + self.seed() + + self.possible_agents = [f"player_{i}" for i in range(n_players)] + self.agent_name_mapping = {name: i for i, name in enumerate(self.possible_agents)} + self.agents = [] + self.pos = {} + self.specified_agent_levels = defaultdict(lambda: None) + for i, level in enumerate(player_levels): + if i >= n_players: + break + self.specified_agent_levels[self.possible_agents[i]] = level + self.agent_levels = {} + # TODO set agent colors + self.agent_colors = defaultdict(lambda: (0, 0, 0)) + for i, agent_color in enumerate(agent_colors): + if i >= n_players: + break + if isinstance(agent_color, list) or isinstance(agent_color, tuple): + self.agent_colors[self.possible_agents[i]] = agent_color + else: + self.agent_colors[self.possible_agents[i]] = ImageColor.getrgb(agent_color) + + + self.field = np.zeros(field_size, np.int32) + + self.penalty = penalty + self.max_food = max_food + self.specified_food_levels = [None] * self.max_food + self.specified_food_levels[:len(food_levels)] = food_levels + self._food_spawned = 0.0 + self.max_agent_level = max_player_level + self.sight = sight + self.force_coop = force_coop + self._game_over = None + + self._rendering_initialized = False + self._valid_actions = None + self._max_cycles = max_cycles + + self._normalize_reward = normalize_reward + self._grid_observation = grid_observation + + self.viewer = None + self.render_mode = render_mode + self.render_style = render_style + + def seed(self, seed=None): + self.np_random, seed = seeding.np_random(seed) + return [seed] + + @functools.lru_cache(maxsize=None) + def observation_space(self, agent): + """The Observation Space for each agent. + - all of the board (board_size^2) with foods + - player description (x, y, level)*player_count + """ + if not self._grid_observation: + field_x = self.field.shape[1] + field_y = self.field.shape[0] + # field_size = field_x * field_y + + max_food = self.max_food + max_food_level = self.max_agent_level * len(self.possible_agents) + + min_obs_food = [-1, -1, 0] + max_obs_food = [field_y-1, field_x-1, max_food_level] + min_obs_agents = [-1, -1, 0] + max_obs_agents = [field_y-1, field_x-1, self.max_agent_level] + + min_obs = min_obs_food * max_food + min_obs_agents * len(self.possible_agents) + max_obs = max_obs_food * max_food + max_obs_agents * len(self.possible_agents) + else: + # grid observation space + grid_shape = (1 + 2 * self.sight, 1 + 2 * self.sight) + + # agents layer: agent levels + agents_min = np.zeros(grid_shape, dtype=np.float32) + agents_max = np.ones(grid_shape, dtype=np.float32) * self.max_agent_level + + # foods layer: foods level + max_food_level = self.max_agent_level * len(self.possible_agents) + foods_min = np.zeros(grid_shape, dtype=np.float32) + foods_max = np.ones(grid_shape, dtype=np.float32) * max_food_level + + # access layer: i the cell available + access_min = np.zeros(grid_shape, dtype=np.float32) + access_max = np.ones(grid_shape, dtype=np.float32) + + # total layer + min_obs = np.stack([agents_min, foods_min, access_min]) + max_obs = np.stack([agents_max, foods_max, access_max]) + return gymnasium.spaces.Box(np.array(min_obs), np.array(max_obs), dtype=np.float32) + + @property + def observation_spaces(self): + return {agent: self.observation_space(agent) for agent in self.possible_agents} + + @functools.lru_cache(maxsize=None) + def action_space(self, agent): + return gymnasium.spaces.Discrete(6) + + @property + def action_spaces(self): + return {agent: self.action_space(agent) for agent in self.possible_agents} + + @property + def field_size(self): + return self.field.shape + + @property + def field_length(self): + return self.field.size + + @property + def rows(self): + return self.field_size[0] + + @property + def cols(self): + return self.field_size[1] + + @property + def game_over(self): + return self._game_over + + def _gen_valid_moves(self): + self._valid_actions = { + agent: [ + action for action in Action if self._is_valid_action(agent, action) + ] + for agent in self.agents + } + + def _action_mask(self, agent): + return np.array([ + 1 if Action(i) in self._valid_actions[agent] else 0 + for i in range(self.action_space(agent).n) + ], dtype=np.int8) + + def neighborhood(self, row, col, distance=1, ignore_diag=False): + if not ignore_diag: + return self.field[ + max(row - distance, 0) : min(row + distance + 1, self.rows), + max(col - distance, 0) : min(col + distance + 1, self.cols), + ] + + return ( + self.field[ + max(row - distance, 0) : min(row + distance + 1, self.rows), col + ].sum() + + self.field[ + row, max(col - distance, 0) : min(col + distance + 1, self.cols) + ].sum() + ) + + def adjacent_food(self, row, col): + return ( + self.field[max(row - 1, 0), col] + + self.field[min(row + 1, self.rows - 1), col] + + self.field[row, max(col - 1, 0)] + + self.field[row, min(col + 1, self.cols - 1)] + ) + + def adjacent_food_location(self, row, col): + if row > 1 and self.field[row - 1, col] > 0: + return row - 1, col + elif row < self.rows - 1 and self.field[row + 1, col] > 0: + return row + 1, col + elif col > 1 and self.field[row, col - 1] > 0: + return row, col - 1 + elif col < self.cols - 1 and self.field[row, col + 1] > 0: + return row, col + 1 + + def adjacent_agents(self, row, col): + return [agent + for agent in self.agents + if abs(self.pos[agent][0] - row) == 1 + and self.pos[agent][1] == col + or abs(self.pos[agent][1] - col) == 1 + and self.pos[agent][0] == row + ] + + def spawn_food(self, max_level): + attempts = 0 + min_level = max_level if self.force_coop else 1 + for food_level in self.specified_food_levels: + while attempts < 1000: + attempts += 1 + row = self.np_random.integers(1, self.rows - 1) + col = self.np_random.integers(1, self.cols - 1) + + # check if it has neighbors: + if ( + self.neighborhood(row, col).sum() > 0 + or self.neighborhood(row, col, distance=2, ignore_diag=True) > 0 + or not self._is_empty_location(row, col) + ): + continue + + self.field[row, col] = (food_level + if food_level is not None + else self.np_random.integers(min_level, max_level+1) + ) + break + self._food_spawned = self.field.sum() + + def _is_empty_location(self, row, col): + if self.field[row, col] != 0: + return False + for pos in self.pos.values(): + if pos[0] == row and pos[1] == col: + return False + return True + + def spawn_agents(self, max_agent_level): + possible_indices = np.arange(self.field_length)[self.field.flatten()==0] + num_agents_to_spawn = len(self.agents) + spawn_indices = self.np_random.choice(possible_indices, + size=num_agents_to_spawn, + replace=False) + unraveled_indices = np.unravel_index(spawn_indices, shape=self.field_size) + unraveled_indices = list(zip(*unraveled_indices)) + for i, agent in enumerate(self.agents): + self.pos[agent] = unraveled_indices[i] + if self.specified_agent_levels[agent] is None: + self.agent_levels[agent] = self.np_random.integers(1, max_agent_level + 1) + else: + self.agent_levels[agent] = min(self.specified_agent_levels[agent], max_agent_level) + + def _is_valid_action(self, agent, action): + if action == Action.NONE: + return True + row_pos = self.pos[agent][0] + col_pos = self.pos[agent][1] + row_pos, col_pos = self.pos[agent] + row_pos_min, col_pos_min = (0, 0) + row_pos_max, col_pos_max = (self.rows-1, self.cols-1) + if action == Action.NORTH: + return ( + row_pos > row_pos_min + and self.field[row_pos-1, col_pos] == 0 + ) + if action == Action.SOUTH: + return ( + row_pos < row_pos_max + and self.field[row_pos+1, col_pos] == 0 + ) + if action == Action.WEST: + return ( + col_pos > col_pos_min + and self.field[row_pos, col_pos-1] == 0 + ) + if action == Action.EAST: + return ( + col_pos < col_pos_max + and self.field[row_pos, col_pos+1] == 0 + ) + if action == Action.LOAD: + return self.adjacent_food(*self.pos[agent]) > 0 + + self.logger.error("Undefined action {} from {}".format(action, agent)) + raise ValueError("Undefined action") + + def _transform_to_neighborhood(self, center, sight, position): + return ( + position[0] - center[0] + min(sight, center[0]), + position[1] - center[1] + min(sight, center[1]), + ) + + def get_valid_actions(self, agent): + # TODO + return self._valid_actions[agent] + + def reset(self, seed=None, return_info=False, options=None): + if seed is not None: + self.seed(seed=seed) + self.field = np.zeros(self.field_size, np.int32) + self.agents = copy(self.possible_agents) + self.spawn_agents(self.max_agent_level) + self.spawn_food( + max_level=sum(self.agent_levels.values()) + ) + self.current_step = 0 + self._game_over = False + self.terminated = False + self.truncated = False + self._gen_valid_moves() + + observations = {agent: self.observe(agent) for agent in self.agents} + infos = {agent: {"action_mask": self._action_mask(agent)} + for agent in self.agents} + return observations, infos + + def step(self, actions): + self.current_step += 1 + + rewards = {agent: 0.0 for agent in self.agents} + actions = {agent: (Action(a) if Action(a) in self._valid_actions[agent] else Action.NONE) + for agent, a in actions.items()} + + loading_agents = set() + # move agents + # if two or more agents try to move to the same location they all fail + collisions = defaultdict(list) + + # so check for collisions + for agent, action in actions.items(): + if action == Action.NONE: + collisions[tuple(self.pos[agent])].append(agent) + elif action == Action.NORTH: + collisions[(self.pos[agent][0] - 1, self.pos[agent][1])].append(agent) + elif action == Action.SOUTH: + collisions[(self.pos[agent][0] + 1, self.pos[agent][1])].append(agent) + elif action == Action.WEST: + collisions[(self.pos[agent][0], self.pos[agent][1] - 1)].append(agent) + elif action == Action.EAST: + collisions[(self.pos[agent][0], self.pos[agent][1] + 1)].append(agent) + elif action == Action.LOAD: + collisions[tuple(self.pos[agent])].append(agent) + loading_agents.add(agent) + + # and do movements for non colliding agents + for pos, agents in collisions.items(): + if len(agents) > 1: # make sure no more than an agents will arrive at location + continue + self.pos[agents[0]] = pos + + # finally process the loadings: + while loading_agents: + # find adjacent food + agent = loading_agents.pop() + frow, fcol = self.adjacent_food_location(*self.pos[agent]) + food = self.field[frow, fcol] + + adj_agents = self.adjacent_agents(frow, fcol) + adj_agents = [ + a for a in adj_agents if a in loading_agents or a is agent + ] + + adj_agent_level = sum([self.agent_levels[a] for a in adj_agents]) + + loading_agents = loading_agents - set(adj_agents) + + if adj_agent_level < food: + # failed to load + for a in adj_agents: + rewards[a] -= self.penalty + continue + + # else the food was loaded and each agent scores points + for a in adj_agents: + rewards[a] = float(self.agent_levels[a] * food) + if self._normalize_reward: + rewards[a] = rewards[a] / float( + adj_agent_level * self._food_spawned + ) # normalize reward + # and the food is removed + self.field[frow, fcol] = 0 + + # TODO when pettingzoo distinguishes between 'done' and 'terminated/truncated' will need to update + self.terminated = self.field.sum == 0 + self.truncated = self._max_cycles <= self.current_step + terminated = {agent: self.terminated for agent in self.agents} + truncated = {agent: self.truncated for agent in self.agents} + self._game_over = self.terminated or self.truncated + dones = {agent: self._game_over for agent in self.agents} + + observations = {agent: self.observe(agent) for agent in self.agents} + + self._gen_valid_moves() + infos = {agent: {"action_mask": self._action_mask(agent), + "terminated": self.terminated, + "truncated": self.truncated,} + for agent in self.agents} + self.agents = [agent for agent in self.agents if not dones[agent]] + return observations, rewards, terminated, truncated, infos + + def _get_global_grid_layers(self): + grid_shape_x, grid_shape_y = self.field_size + grid_shape_x += 2 * self.sight + grid_shape_y += 2 * self.sight + grid_shape = (grid_shape_x, grid_shape_y) + + # Agents layer: level & position of agents + agents_layer = np.zeros(grid_shape, dtype=np.float32) + for agent in self.agents: + row, col = self.pos[agent] + agents_layer[self.sight + row, self.sight + col] = self.agent_levels[agent] + + # Foods layer: level & position of foods + foods_layer = np.zeros(grid_shape, dtype=np.float32) + foods_layer[self.sight:-self.sight, self.sight:-self.sight] = self.field.copy() + + # Access layer: 1 if grid cells are accessible + access_layer = np.ones(grid_shape, dtype=np.float32) + # out of bounds not accessible + access_layer[:self.sight, :] = 0.0 + access_layer[-self.sight:, :] = 0.0 + access_layer[:, :self.sight] = 0.0 + access_layer[:, -self.sight:] = 0.0 + # agent locations are not accessible + for agent in self.agents: + row, col = self.pos[agent] + access_layer[self.sight + row, self.sight + col] = 0.0 + # food locations are not accessible + for row, col in zip(*self.field.nonzero()): + access_layer[self.sight + row, self.sight + col] = 0.0 + + return np.stack([agents_layer, foods_layer, access_layer]) + + def _get_grid_obs(self, agent): + global_grid_layers = self._get_global_grid_layers() + row, col = self.pos[agent] + start_row, end_row = row, row + 2*self.sight+1 + start_col, end_col = col, col + 2*self.sight+1 + return global_grid_layers[:, start_row:end_row, start_col:end_col] + + def _get_array_obs(self, agent): + obs = np.zeros(self.observation_space(agent).shape, dtype=np.float32) + local_field = self.neighborhood(*self.pos[agent], distance=self.sight) + obs[:3*self.max_food] = np.tile([-1, -1, 0], reps=self.max_food) + for i, (row, col) in enumerate(zip(*np.nonzero(local_field))): + obs[(3*i):(3*i+3)] = [row, col, local_field[row, col]] + + obs[3*self.max_food:] = np.tile([-1, -1, 0], reps=len(self.possible_agents)) + # self agent is always first + ordered_agents = [agent] + [a for a in self.possible_agents if a != agent] + for i, other_agent in enumerate(ordered_agents): + relative_pos = self._transform_to_neighborhood(self.pos[agent], + self.sight, + self.pos[other_agent]) + if self._in_sight(relative_pos): + idx = 3*self.max_food + 3*i + obs[idx:idx+3] = [*relative_pos, self.agent_levels[other_agent]] + return obs + + def _in_sight(self, relative_pos): + lower_bound = np.array([0, 0]) + upper_bound = np.array([2*self.sight, 2*self.sight]) + rpos = np.array(relative_pos) + return np.any((lower_bound < rpos) & (rpos < upper_bound)) + + def observe(self, agent): + if self._grid_observation: + obs = self._get_grid_obs(agent) + else: + obs = self._get_array_obs(agent) + assert self.observation_space(agent).contains(obs), \ + f"obs space error: obs: {obs}, obs_space: {self.observation_space(agent)}" + return obs + + def _init_render(self): + if self.render_style == "full": + from .rendering import Viewer + self.viewer = Viewer((self.rows, self.cols)) + elif self.render_style == "simple": + from .simple_render import render + self.simple_render = render + self._rendering_initialized = True + + def render(self): + if not self._rendering_initialized: + self._init_render() + if self.render_style == "full": + return self.viewer.render(self, return_rgb_array=(self.render_mode=="rgb_array")) + elif self.render_style == "simple": + return self.simple_render(self) + + def close(self): + if self.viewer: + self.viewer.close() diff --git a/lbforaging/petting_zoo/icons/agent.png b/lbforaging/petting_zoo/icons/agent.png new file mode 100644 index 0000000..dcf173f Binary files /dev/null and b/lbforaging/petting_zoo/icons/agent.png differ diff --git a/lbforaging/petting_zoo/icons/apple.png b/lbforaging/petting_zoo/icons/apple.png new file mode 100644 index 0000000..bd27726 Binary files /dev/null and b/lbforaging/petting_zoo/icons/apple.png differ diff --git a/lbforaging/petting_zoo/rendering.py b/lbforaging/petting_zoo/rendering.py new file mode 100644 index 0000000..5549172 --- /dev/null +++ b/lbforaging/petting_zoo/rendering.py @@ -0,0 +1,246 @@ +""" +2D rendering of the level based foraging domain +""" + +import math +import os +import sys + +import numpy as np +import math +import six +from gym import error + +if "Apple" in sys.version: + if "DYLD_FALLBACK_LIBRARY_PATH" in os.environ: + os.environ["DYLD_FALLBACK_LIBRARY_PATH"] += ":/usr/lib" + # (JDS 2016/04/15): avoid bug on Anaconda 2.3.0 / Yosemite + + +try: + import pyglet + pyglet.options["headless"] = True +except ImportError as e: + raise ImportError( + """ + Cannot import pyglet. + HINT: you can install pyglet directly via 'pip install pyglet'. + But if you really just want to install all Gym dependencies and not have to think about it, + 'pip install -e .[all]' or 'pip install gym[all]' will do it. + """ + ) + +try: + from pyglet.gl import * +except ImportError as e: + raise ImportError( + """ + Error occured while running `from pyglet.gl import *` + HINT: make sure you have OpenGL install. On Ubuntu, you can run 'apt-get install python-opengl'. + If you're running on a server, you may need a virtual frame buffer; something like this should work: + 'xvfb-run -s \"-screen 0 1400x900x24\" python ' + """ + ) + + +RAD2DEG = 57.29577951308232 +# # Define some colors +_BLACK = (0, 0, 0) +_WHITE = (255, 255, 255) +_GREEN = (0, 255, 0) +_RED = (255, 0, 0) + +_BACKGROUND_COLOR = _WHITE +_GRID_COLOR = _BLACK + + +def get_display(spec): + """Convert a display specification (such as :0) into an actual Display + object. + Pyglet only supports multiple Displays on Linux. + """ + if spec is None: + return None + elif isinstance(spec, six.string_types): + return pyglet.canvas.Display(spec) + else: + raise error.Error( + "Invalid display specification: {}. (Must be a string like :0 or None.)".format( + spec + ) + ) + + +class Viewer(object): + def __init__(self, world_size): + display = get_display(None) + self.rows, self.cols = world_size + + self.grid_size = 50 + self.icon_size = 20 + + self.width = 1 + self.cols * (self.grid_size + 1) + self.height = 1 + self.rows * (self.grid_size + 1) + self.window = pyglet.window.Window( + width=self.width, height=self.height, display=display + ) + self.window.on_close = self.window_closed_by_user + self.isopen = True + + glEnable(GL_BLEND) + glBlendFunc(GL_SRC_ALPHA, GL_ONE_MINUS_SRC_ALPHA) + + script_dir = os.path.dirname(__file__) + + pyglet.resource.path = [os.path.join(script_dir, "icons")] + pyglet.resource.reindex() + + self.img_apple = pyglet.resource.image("apple.png") + self.img_agent = pyglet.resource.image("agent.png") + + def close(self): + self.window.close() + + def window_closed_by_user(self): + self.isopen = False + exit() + + def set_bounds(self, left, right, bottom, top): + assert right > left and top > bottom + scalex = self.width / (right - left) + scaley = self.height / (top - bottom) + self.transform = Transform( + translation=(-left * scalex, -bottom * scaley), scale=(scalex, scaley) + ) + + def render(self, env, return_rgb_array=False): + glClearColor(*_WHITE, 0) + self.window.clear() + self.window.switch_to() + self.window.dispatch_events() + + self._draw_grid() + self._draw_food(env) + self._draw_players(env) + + if return_rgb_array: + buffer = pyglet.image.get_buffer_manager().get_color_buffer() + image_data = buffer.get_image_data() + arr = np.frombuffer(image_data.get_data(), dtype=np.uint8) + arr = arr.reshape(buffer.height, buffer.width, 4) + arr = arr[::-1, :, 0:3] + self.window.flip() + return arr if return_rgb_array else self.isopen + + def _draw_grid(self): + batch = pyglet.graphics.Batch() + # vertical lines + for r in range(self.rows + 1): + batch.add( + 2, + gl.GL_LINES, + None, + ( + "v2f", + ( + 0, # LEFT X + (self.grid_size + 1) * r + 1, # Y + (self.grid_size + 1) * self.cols, # RIGHT X + (self.grid_size + 1) * r + 1, # Y + ), + ), + ("c3B", (*_BLACK, *_BLACK)), + ) + + # horizontal lines + for c in range(self.cols + 1): + batch.add( + 2, + gl.GL_LINES, + None, + ( + "v2f", + ( + (self.grid_size + 1) * c + 1, # X + 0, # BOTTOM Y + (self.grid_size + 1) * c + 1, # X + (self.grid_size + 1) * self.rows, # TOP X + ), + ), + ("c3B", (*_BLACK, *_BLACK)), + ) + batch.draw() + + def _draw_food(self, env): + idxes = list(zip(*env.field.nonzero())) + apples = [] + batch = pyglet.graphics.Batch() + + # print(env.field) + for row, col in idxes: + apples.append( + pyglet.sprite.Sprite( + self.img_apple, + (self.grid_size + 1) * col, + self.height - (self.grid_size + 1) * (row + 1), + batch=batch, + ) + ) + for a in apples: + a.update(scale=self.grid_size / a.width) + batch.draw() + + for row, col in idxes: + self._draw_badge(row, col, env.field[row, col]) + + def _draw_players(self, env): + agents = [] + batch = pyglet.graphics.Batch() + + for agent in env.agents: + row, col = env.pos[agent] + sprite = pyglet.sprite.Sprite( + self.img_agent, + (self.grid_size + 1) * col, + self.height - (self.grid_size + 1) * (row + 1), + batch=batch, + ) + sprite.color = env.agent_colors[agent] + agents.append(sprite) + for agent_sprite in agents: + agent_sprite.update(scale=self.grid_size / agent_sprite.width) + batch.draw() + for agent in env.agents: + self._draw_badge(*env.pos[agent], env.agent_levels[agent]) + + def _draw_badge(self, row, col, level): + resolution = 6 + radius = self.grid_size / 5 + + badge_x = col * (self.grid_size + 1) + (3 / 4) * (self.grid_size + 1) + badge_y = self.height - (self.grid_size + 1) * (row + 1) + (1 / 4) * (self.grid_size + 1) + + # make a circle + verts = [] + for i in range(resolution): + angle = 2 * math.pi * i / resolution + x = radius * math.cos(angle) + badge_x + y = radius * math.sin(angle) + badge_y + verts += [x, y] + circle = pyglet.graphics.vertex_list(resolution, ("v2f", verts)) + glColor3ub(*_WHITE) + circle.draw(GL_POLYGON) + glColor3ub(*_BLACK) + circle.draw(GL_LINE_LOOP) + label = pyglet.text.Label( + str(level), + font_name="Times New Roman", + font_size=12, + bold=True, + x=badge_x, + y=badge_y + 2, + anchor_x="center", + anchor_y="center", + color=(*_BLACK, 255), + ) + label.draw() diff --git a/lbforaging/petting_zoo/simple_render.py b/lbforaging/petting_zoo/simple_render.py new file mode 100644 index 0000000..88f337a --- /dev/null +++ b/lbforaging/petting_zoo/simple_render.py @@ -0,0 +1,107 @@ +import numpy as np +from PIL import Image + +PIXEL_SCALE = 3 +IMG_SCALE = 24 + +BASE_COLOUR = (0,0,255) +CHEQUER_V = 230 + +FOOD_BASE = ( 0,255,255) +FOOD_RING = ( 0,128,255) +FOOD_LVLS = ( 0,255,204) + +AGENT_BASE = (140,255,255) +AGENT_RING = (140,128,255) +AGENT_LVLS = (140,255,204) + +ring_points = ( + [(r, PIXEL_SCALE-1) for r in range(PIXEL_SCALE-1)] + + [(PIXEL_SCALE-1, c) for c in reversed(range(1,PIXEL_SCALE))] + + [(r, 0 ) for r in reversed(range(1,PIXEL_SCALE))] + + [(0, c ) for c in range(PIXEL_SCALE-1)] +) + +def _pixel_to_slice(pixel): + return slice(pixel*PIXEL_SCALE, (pixel+1)*PIXEL_SCALE) + + +def _color_to_arr(color): + return np.expand_dims(np.array(color, dtype=np.uint8), (1,2)) + + +def _food_pixel(lvl): + """Builds a food sprite of given level `lvl`.""" + pixel = np.tile( + _color_to_arr(FOOD_BASE), + (PIXEL_SCALE, PIXEL_SCALE) + ) + #draw the level indicator ring: + pixel[:, 0,:] = _color_to_arr(FOOD_RING).reshape(3,1) + pixel[:,-1,:] = _color_to_arr(FOOD_RING).reshape(3,1) + pixel[:,:, 0] = _color_to_arr(FOOD_RING).reshape(3,1) + pixel[:,:,-1] = _color_to_arr(FOOD_RING).reshape(3,1) + ring_start = PIXEL_SCALE//2 + for l in range(lvl): + point = ring_points[(l+ring_start)%len(ring_points)] + pixel[(slice(None), point[1], point[0])] = _color_to_arr(FOOD_LVLS).squeeze() + return pixel + + +def _agent_pixel(lvl): + """Builds a agent sprite of given level `lvl`.""" + pixel = np.tile( + _color_to_arr(AGENT_BASE), + (PIXEL_SCALE, PIXEL_SCALE) + ) + #draw the level indictator ring: + pixel[:, 0,:] = _color_to_arr(AGENT_RING).reshape(3,1) + pixel[:,-1,:] = _color_to_arr(AGENT_RING).reshape(3,1) + pixel[:,:, 0] = _color_to_arr(AGENT_RING).reshape(3,1) + pixel[:,:,-1] = _color_to_arr(AGENT_RING).reshape(3,1) + ring_start = PIXEL_SCALE//2 + for l in range(lvl): + point = ring_points[(l+ring_start)%len(ring_points)] + pixel[(slice(None), point[1], point[0])] = _color_to_arr(AGENT_LVLS).squeeze() + return pixel + + +def render(env): + """Renders the envrionment.""" + base_pixel = np.tile( + _color_to_arr(BASE_COLOUR), + (PIXEL_SCALE, PIXEL_SCALE) + ) + field_size = env.field_size + img = np.tile(base_pixel, field_size) + # chequer + for y in range(field_size[0]): + for x in range(field_size[1]): + if (x-y)%2 == 0: + r = _pixel_to_slice(y) + c = _pixel_to_slice(x) + img[2,r,c] = CHEQUER_V + + # Food + for (y,x) in zip(*np.nonzero(env.field)): + r = _pixel_to_slice(y) + c = _pixel_to_slice(x) + l = env.field[y,x] + img[:,r,c] = _food_pixel(l) + + # Agents + for agent, pos in env.pos.items(): + r = _pixel_to_slice(pos[0]) + c = _pixel_to_slice(pos[1]) + l = env.agent_levels[agent] + img[:,r,c] = _agent_pixel(l) + + rgb_image = Image.fromarray( + img.transpose((1,2,0)), mode="HSV" + ).convert( + "RGB" + ).resize( + (IMG_SCALE*img.shape[2], IMG_SCALE*img.shape[1]), + resample=Image.Resampling.NEAREST, + ) + return np.asarray(rgb_image) diff --git a/setup.py b/setup.py index c91d310..0a3d7f7 100644 --- a/setup.py +++ b/setup.py @@ -13,8 +13,19 @@ "Programming Language :: Python :: 3.5", "Programming Language :: Python :: 3.6", "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", ], - install_requires=["numpy", "gym>=0.12", "pyglet"], - extras_require={"test": ["pytest"]}, + install_requires=[ + "numpy>=1.23.1", + "gymnasium>=0.26.2", + "pettingzoo>=1.23.0", + "pillow", + ], + extras_require={ + "test": ["pytest"], + "full_render": ["pyglet<2"], + }, include_package_data=True, ) diff --git a/tests/petting_zoo_test.py b/tests/petting_zoo_test.py new file mode 100644 index 0000000..7d203f2 --- /dev/null +++ b/tests/petting_zoo_test.py @@ -0,0 +1,28 @@ +import pytest +import pettingzoo.test as pzt +import lbforaging.petting_zoo as lbf +import supersuit as ss +from fst.envs.agent_dict_concat_vecenv import agent_dict_concat_vec_env_v0 + +def max_cycles_test(): + pzt.max_cycles_test(lbf) + +def seed_test(): + pzt.seed_test(lbf.env) + +def api_test(): + env = lbf.env() + pzt.api_test(env, num_cycles=1000) + +def parallel_api_test(): + p_env = lbf.parallel_env() + pzt.parallel_api_test(p_env, num_cycles=1000) + +def vec_env_test(): + p_env = lbf.parallel_env() + mve = ss.pettingzoo_env_to_vec_env_v1(p_env) + +def concat_vec_env_test(): + p_env = lbf.parallel_env() + mve = ss.pettingzoo_env_to_vec_env_v1(p_env) + cve = ss.concat_vec_envs_v1(mve, 4) diff --git a/tests/test_env.py b/tests/test_env.py index e58c0f9..4556327 100644 --- a/tests/test_env.py +++ b/tests/test_env.py @@ -1,7 +1,7 @@ import pytest import numpy as np -import lbforaging -from lbforaging.foraging.environment import Action +import lbforaging.gym_env +from lbforaging.gym_env.environment import Action import gym @@ -166,4 +166,4 @@ def test_partial_obs_3(simple2p1f): obs, _, _, _ = env.step([Action.WEST, Action.NONE]) assert obs[0][-2] > -1 - assert obs[1][-2] > -1 \ No newline at end of file + assert obs[1][-2] > -1