-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathenvironment.py
More file actions
112 lines (83 loc) · 3.22 KB
/
environment.py
File metadata and controls
112 lines (83 loc) · 3.22 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
import collections
import random
import tensorflow as tf
flags = tf.app.flags
FLAGS = flags.FLAGS
random.seed(42)
COLOR_RED = "R"
COLOR_BLACK = "B"
# twice as likely to generate a black card.
COLORS = [COLOR_RED, COLOR_BLACK]
COLOR_CHOOSE = [COLOR_RED, COLOR_BLACK, COLOR_BLACK]
ACTION_HIT = "H"
ACTION_STICK = "S"
ACTIONS = [ACTION_HIT, ACTION_STICK]
Card = collections.namedtuple("Card", ["color", "value"])
class Card(object):
def __init__(self, color, value):
self.color = color
self.value = value
def get_game_value(self):
return self.value if self.color == COLOR_BLACK else -self.value
def __eq__(self, other):
return self.color == other.color and self.value == other.value
def __hash__(self):
return hash((COLORS.index(self.color), self.value))
def __str__(self):
return "color:%s value:%d" % (self.color, self.value)
class State(object):
def __init__(self, dealer_card, player_sum, is_terminal=False):
self.dealer_card = dealer_card
self.player_sum = player_sum
self.is_terminal = is_terminal
def __eq__(self, other):
return (self.is_terminal == other.is_terminal and
self.dealer_card == other.dealer_card and
self.player_sum == other.player_sum)
def __hash__(self):
return hash((self.dealer_card, self.player_sum, self.is_terminal))
def __str__(self):
return "dealer: %s player: %d terminal: %s" % (
str(self.dealer_card), self.player_sum, self.is_terminal)
TERMINAL_STATE = State(Card(COLOR_RED, 1), 1, True)
class Environment(object):
def __init__(self):
pass
def step(self, state, action):
""" Gets a sample of the next state given a state and an action.
Args:
state: A State that represents the state before the action.
action: One of ACTIONS representing the action taken by the player.
Returns:
new_state: The state after the resolution of the action, can be terminal.
reward: The sum of the rewards encountered while resolving action.
"""
reward = 0.0
new_card = self._generate_random_card()
if action == ACTION_HIT:
new_player_value = state.player_sum + new_card.get_game_value()
if new_player_value < 1 or new_player_value > 21:
return TERMINAL_STATE, -1
return State(state.dealer_card, new_player_value), 0.0
current_dealer_value = state.dealer_card.get_game_value()
# STICK
current_dealer_value += new_card.get_game_value()
while current_dealer_value < 17:
if current_dealer_value < 1:
return TERMINAL_STATE, 1
drawn_card = self._generate_random_card()
current_dealer_value += drawn_card.get_game_value()
if current_dealer_value > 21:
return TERMINAL_STATE, 1
if state.player_sum > current_dealer_value:
return TERMINAL_STATE, 1
if state.player_sum == current_dealer_value:
return TERMINAL_STATE, 0
return TERMINAL_STATE, -1
def generate_starting_state(self):
return State(self._generate_random_card(force_black=True),
self._generate_random_card(force_black=True).get_game_value())
@staticmethod
def _generate_random_card(force_black=False):
return Card(COLOR_BLACK if force_black else random.choice(COLOR_CHOOSE),
random.randint(1,10))