-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgardner.py
executable file
·108 lines (92 loc) · 4.45 KB
/
gardner.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
# -*- coding: utf-8 -*-
"""
@author: Hugo Gilbert and Bruno Zanuttini
"""
from mdp import *
import random
# ===============================================================================================
# Two classes for representing the standard and the sequential versions of Gardner's dice as
# MDPs. Nothing here is specific to the SSB-Q-Learning algorithm whatsoever.
# ===============================================================================================
# Non sequential version of Gardner's dice
# There are 7 states, "s0" from which actions "throw(A)", "throw(B)", and "throw(C)" are available,
# and "rolled1",...,"rolled6" with wealthLevels "w1",...,"w6", resp., where "wI" is preferred to "wJ" if and only if
# I > J holds (always with magnitude 1). All states but "s0" are final, and in other states there is only the
# deterministic action "reinit", which leads to "s0"
class GardnerDiceMDP (MDP):
def __init__ (self):
states = ["s0"]+["rolled"+str(i) for i in xrange(1,7)]
actions = ["reinit","throw(A)","throw(B)","throw(C)"]
wealthLevels = ["w"+str(i) for i in xrange(1,7)]
finalStates = ["rolled"+str(i) for i in xrange(1,7)]
self.real_nash_equilibrium = {"w1":1./26,"w2":7./26, "w3":5./26, "w4":5./26, "w5":7./26, "w6":1./26}
MDP.__init__(self,states,actions,wealthLevels,self.allowedActionsFunction,finalStates,self.wealthFunction,self.transitionFunction,self.ssbFunction,"s0","gardner")
def allowedActionsFunction (self, state):
if state!="s0":
return ["reinit"]
return ["throw(A)","throw(B)","throw(C)"]
def wealthFunction (self, finalState):
return "w"+finalState[6:]
def transitionFunction (self, state, action):
if state=="s0":
if action=="throw(A)":
return {"rolled1":1/6.,"rolled4":5/6.}
if action=="throw(B)":
return {"rolled3":5/6.,"rolled6":1/6.}
if action=="throw(C)":
return {"rolled2":1/2.,"rolled5":1/2.}
if state!="s0" and action=="reinit":
return {"s0":1.}
def ssbFunction (self, wealthLevel, otherWealthLevel):
asInt = int(wealthLevel[1:])
otherAsInt = int(otherWealthLevel[1:])
if asInt>otherAsInt:
return +10
if otherAsInt>asInt:
return -10
return 0
def __str__ (self):
return "Gardner's dice"
# Sequential version of Gardner's dice
# There are 8 states, "s0" from which actions "throw(A)", "not-throw(A)" are available, with "not-throw(A)" leading
# deterministically to "sBC", from which "throw(B)" and "throw(C)" are available. The rest is similar to the nonsequential
# version of the problem.
class SequentialGardnerDiceMDP (MDP):
def __init__ (self):
states = ["s0"]+["sBC"]+["rolled"+str(i) for i in xrange(1,7)]
actions = ["reinit","throw(A)","not-throw(A)","throw(B)","throw(C)"]
wealthLevels = ["w"+str(i) for i in xrange(1,7)]
finalStates = ["rolled"+str(i) for i in xrange(1,7)]
self.real_nash_equilibrium = {"w1":1./26,"w2":7./26, "w3":5./26, "w4":5./26, "w5":7./26, "w6":1./26}
MDP.__init__(self,states,actions,wealthLevels,self.allowedActionsFunction,finalStates,self.wealthFunction,self.transitionFunction,self.ssbFunction, "s0", "sequentialGardner")
def allowedActionsFunction (self, state):
if state!="s0" and state!="sBC":
return ["reinit"]
if state=="s0":
return ["throw(A)","not-throw(A)"]
return ["throw(B)","throw(C)"]
def wealthFunction (self, finalState):
return "w"+finalState[6:]
def transitionFunction (self, state, action):
if state=="s0":
if action=="throw(A)":
return {"rolled1":1/6.,"rolled4":5/6.}
if action=="not-throw(A)":
return {"sBC":1.}
if state=="sBC":
if action=="throw(B)":
return {"rolled3":5/6.,"rolled6":1/6.}
if action=="throw(C)":
return {"rolled2":1/2.,"rolled5":1/2.}
if state!="s0" and state!="sBC" and action=="reinit":
return {"s0":1.}
def ssbFunction (self, wealthLevel, otherWealthLevel):
asInt = int(wealthLevel[1:])
otherAsInt = int(otherWealthLevel[1:])
if asInt>otherAsInt:
return +10
if otherAsInt>asInt:
return -10
return 0
def __str__ (self):
return "Sequential Gardner's dice"