easy_21/environment.py at master · nmikhaylin/easy_21 · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import collections
import random
import tensorflow as tf

flags = tf.app.flags
FLAGS = flags.FLAGS

random.seed(42)

COLOR_RED = "R"
COLOR_BLACK = "B"
# twice as likely to generate a black card.
COLORS = [COLOR_RED, COLOR_BLACK]
COLOR_CHOOSE = [COLOR_RED, COLOR_BLACK, COLOR_BLACK]

ACTION_HIT = "H"
ACTION_STICK = "S"
ACTIONS = [ACTION_HIT, ACTION_STICK]


Card = collections.namedtuple("Card", ["color", "value"])

class Card(object):
  def __init__(self, color, value):
    self.color = color
    self.value = value

  def get_game_value(self):
    return self.value if self.color == COLOR_BLACK else -self.value

  def __eq__(self, other):
    return self.color == other.color and self.value == other.value

  def __hash__(self):
    return hash((COLORS.index(self.color), self.value))

  def __str__(self):
    return "color:%s value:%d" % (self.color, self.value)


class State(object):
  def __init__(self, dealer_card, player_sum, is_terminal=False):
    self.dealer_card = dealer_card
    self.player_sum = player_sum
    self.is_terminal = is_terminal

  def __eq__(self, other):
    return (self.is_terminal == other.is_terminal and
            self.dealer_card == other.dealer_card and
            self.player_sum == other.player_sum)

  def __hash__(self):
    return hash((self.dealer_card, self.player_sum, self.is_terminal))

  def __str__(self):
    return "dealer: %s player: %d terminal: %s" % (
        str(self.dealer_card), self.player_sum, self.is_terminal)


TERMINAL_STATE = State(Card(COLOR_RED, 1), 1, True)


class Environment(object):

  def __init__(self):
    pass

  def step(self, state, action):
    """ Gets a sample of the next state given a state and an action.

    Args:
      state: A State that represents the state before the action.
      action: One of ACTIONS representing the action taken by the player.

    Returns:
      new_state: The state after the resolution of the action, can be terminal.
      reward: The sum of the rewards encountered while resolving action.
    """
    reward = 0.0
    new_card = self._generate_random_card()
    if action == ACTION_HIT:
      new_player_value = state.player_sum + new_card.get_game_value()
      if new_player_value < 1 or new_player_value > 21:
        return TERMINAL_STATE, -1
      return State(state.dealer_card, new_player_value), 0.0
    current_dealer_value = state.dealer_card.get_game_value()
    # STICK
    current_dealer_value += new_card.get_game_value()
    while current_dealer_value < 17:
      if current_dealer_value < 1:
        return TERMINAL_STATE, 1
      drawn_card = self._generate_random_card()
      current_dealer_value += drawn_card.get_game_value()
    if current_dealer_value > 21:
      return TERMINAL_STATE, 1
    if state.player_sum > current_dealer_value:
      return TERMINAL_STATE, 1
    if state.player_sum == current_dealer_value:
      return TERMINAL_STATE, 0
    return TERMINAL_STATE, -1

  def generate_starting_state(self):
    return State(self._generate_random_card(force_black=True),
                 self._generate_random_card(force_black=True).get_game_value())


  @staticmethod
  def _generate_random_card(force_black=False):
    return Card(COLOR_BLACK if force_black else random.choice(COLOR_CHOOSE),
                random.randint(1,10))