5
5
from random import choice
6
6
7
7
8
+ class Stat (object ):
9
+ __slots__ = ('value' , 'visits' )
10
+
11
+ def __init__ (self , value = 0 , visits = 0 ):
12
+ self .value = value
13
+ self .visits = visits
14
+
15
+
8
16
class MonteCarlo (object ):
9
17
def __init__ (self , board , ** kwargs ):
10
18
self .board = board
11
19
self .history = []
12
- self .wins = {}
13
- self .plays = {}
20
+ self .stats = {}
14
21
15
22
self .max_depth = 0
16
23
self .data = {}
@@ -66,9 +73,9 @@ def get_play(self):
66
73
# Display the stats for each possible play.
67
74
self .data ['moves' ] = sorted (
68
75
({'move' : p ,
69
- 'percent' : 100 * self .wins . get (( player , S ), 0 ) / self .plays . get (( player , S ), 1 ) ,
70
- 'wins' : self .wins . get (( player , S ), 0 ) ,
71
- 'plays' : self .plays . get (( player , S ), 0 ) }
76
+ 'percent' : 100 * self .stats [( player , S )]. value / self .stats [( player , S )]. visits ,
77
+ 'wins' : self .stats [( player , S )]. value ,
78
+ 'plays' : self .stats [( player , S )]. visits }
72
79
for p , S in moves_states ),
73
80
key = lambda x : (x ['percent' ], x ['plays' ]),
74
81
reverse = True
@@ -78,9 +85,8 @@ def get_play(self):
78
85
79
86
# Pick the move with the highest percentage of wins.
80
87
percent_wins , num_moves , move = max (
81
- (self .wins .get ((player , S ), 0 ) /
82
- self .plays .get ((player , S ), 1 ),
83
- self .plays .get ((player , S ), 0 ),
88
+ (self .stats [(player , S )].value / self .stats [(player , S )].visits ,
89
+ self .stats [(player , S )].visits ,
84
90
p )
85
91
for p , S in moves_states
86
92
)
@@ -93,7 +99,7 @@ def run_simulation(self):
93
99
94
100
# A bit of an optimization here, so we have a local
95
101
# variable lookup instead of an attribute access each loop.
96
- plays , wins = self .plays , self . wins
102
+ stats = self .stats
97
103
98
104
visited_states = set ()
99
105
history_copy = self .history [:]
@@ -105,13 +111,13 @@ def run_simulation(self):
105
111
legal = self .board .legal_plays (history_copy )
106
112
moves_states = [(p , self .board .next_state (state , p )) for p in legal ]
107
113
108
- if all (plays . get (( player , S )) for p , S in moves_states ):
114
+ if all (( player , S ) in stats for p , S in moves_states ):
109
115
# If we have stats on all of the legal moves here, use UCB1.
110
116
log_total = log (
111
- sum (plays [(player , S )] for p , S in moves_states ))
117
+ sum (stats [(player , S )]. visits for p , S in moves_states ))
112
118
value , move , state = max (
113
- ((wins [(player , S )] / plays [(player , S )]) +
114
- self .C * sqrt (log_total / plays [(player , S )]), p , S )
119
+ ((stats [(player , S )]. value / stats [(player , S )]. visits ) +
120
+ self .C * sqrt (log_total / stats [(player , S )]. visits ), p , S )
115
121
for p , S in moves_states
116
122
)
117
123
else :
@@ -122,10 +128,9 @@ def run_simulation(self):
122
128
123
129
# `player` here and below refers to the player
124
130
# who moved into that particular state.
125
- if expand and (player , state ) not in plays :
131
+ if expand and (player , state ) not in stats :
126
132
expand = False
127
- plays [(player , state )] = 0
128
- wins [(player , state )] = 0
133
+ stats [(player , state )] = Stat ()
129
134
if t > self .max_depth :
130
135
self .max_depth = t
131
136
@@ -137,20 +142,19 @@ def run_simulation(self):
137
142
break
138
143
139
144
for player , state in visited_states :
140
- if (player , state ) not in plays :
145
+ if (player , state ) not in stats :
141
146
continue
142
- plays [(player , state )] += 1
147
+ S = stats [(player , state )]
148
+ S .visits += 1
143
149
if player == winner :
144
- wins [( player , state )] += 1
150
+ S . value += 1
145
151
146
152
147
153
class ValueMonteCarlo (object ):
148
154
def __init__ (self , board , ** kwargs ):
149
155
self .board = board
150
156
self .history = []
151
-
152
- self .values = {}
153
- self .plays = {}
157
+ self .stats = {}
154
158
155
159
self .max_depth = 0
156
160
self .data = {}
@@ -175,9 +179,6 @@ def get_play(self):
175
179
# Causes the AI to calculate the best move from the
176
180
# current game state and return it.
177
181
178
- self .values .clear ()
179
- self .plays .clear ()
180
-
181
182
self .max_depth = 0
182
183
self .data = {}
183
184
@@ -209,9 +210,9 @@ def get_play(self):
209
210
# Display the stats for each possible play.
210
211
self .data ['moves' ] = sorted (
211
212
({'move' : p ,
212
- 'average' : self .values . get (( player , S ), 0 ) / self .plays . get (( player , S ), 1 ) ,
213
- 'sum' : self .values . get (( player , S ), 0 ) ,
214
- 'plays' : self .plays . get (( player , S ), 0 ) }
213
+ 'average' : self .stats [( player , S )]. value / self .stats [( player , S )]. visits ,
214
+ 'sum' : self .stats [( player , S )]. value ,
215
+ 'plays' : self .stats [( player , S )]. visits }
215
216
for p , S in moves_states ),
216
217
key = lambda x : (x ['average' ], x ['plays' ]),
217
218
reverse = True
@@ -221,9 +222,8 @@ def get_play(self):
221
222
222
223
# Pick the move with the highest average value.
223
224
average , num_moves , move = max (
224
- (self .values .get ((player , S ), 0 ) /
225
- self .plays .get ((player , S ), 1 ),
226
- self .plays .get ((player , S ), 0 ),
225
+ (self .stats [(player , S )].value / self .stats [(player , S )].visits ,
226
+ self .stats [(player , S )].visits ,
227
227
p )
228
228
for p , S in moves_states
229
229
)
@@ -236,7 +236,7 @@ def run_simulation(self):
236
236
237
237
# A bit of an optimization here, so we have a local
238
238
# variable lookup instead of an attribute access each loop.
239
- plays , values = self .plays , self . values
239
+ stats = self .stats
240
240
241
241
visited_states = set ()
242
242
history_copy = self .history [:]
@@ -248,13 +248,13 @@ def run_simulation(self):
248
248
legal = self .board .legal_plays (history_copy )
249
249
moves_states = [(p , self .board .next_state (state , p )) for p in legal ]
250
250
251
- if all (plays . get (( player , S )) for p , S in moves_states ):
251
+ if all (( player , S ) in stats for p , S in moves_states ):
252
252
# If we have stats on all of the legal moves here, use UCB1.
253
253
log_total = log (
254
- sum (plays [(player , S )] for p , S in moves_states ))
254
+ sum (stats [(player , S )]. visits for p , S in moves_states ))
255
255
value , move , state = max (
256
- ((values [(player , S )] / plays [(player , S )]) +
257
- self .C * sqrt (log_total / plays [(player , S )]), p , S )
256
+ ((stats [(player , S )]. value / stats [(player , S )]. visits ) +
257
+ self .C * sqrt (log_total / stats [(player , S )]. visits ), p , S )
258
258
for p , S in moves_states
259
259
)
260
260
else :
@@ -265,10 +265,9 @@ def run_simulation(self):
265
265
266
266
# `player` here and below refers to the player
267
267
# who moved into that particular state.
268
- if expand and (player , state ) not in plays :
268
+ if expand and (player , state ) not in stats :
269
269
expand = False
270
- plays [(player , state )] = 0
271
- values [(player , state )] = 0
270
+ stats [(player , state )] = Stat ()
272
271
if t > self .max_depth :
273
272
self .max_depth = t
274
273
@@ -281,11 +280,12 @@ def run_simulation(self):
281
280
282
281
player_values = {}
283
282
for player , state in visited_states :
284
- if (player , state ) not in plays :
283
+ if (player , state ) not in stats :
285
284
continue
286
285
if player not in player_values :
287
286
player_values [player ] = self .board .end_value (history_copy , player )
288
287
289
- plays [(player , state )] += 1
288
+ S = stats [(player , state )]
289
+ S .visits += 1
290
290
if player_values [player ] is not None :
291
- values [( player , state )] += player_values [player ]
291
+ S . value += player_values [player ]
0 commit comments