-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
134 lines (103 loc) · 3.99 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
import sys
import signal
import time
# sys.path.append('/home/test/pyRDDLGym')
from pyRDDLGym import RDDLEnv
from pyRDDLGym import ExampleManager
from pyRDDLGym.Policies.Agents import NoOpAgent
###
# import dependencies for agents and environments
import numpy as np
from fibonacci import fibonacci
from MyAgent.Agent import NoOpAgent as MyRDDLAgent
from MyAgent.RandomAgent import RandomAgent as MyRDDLAgent
# install py-fibonacci
def signal_handler(signum, frame):
raise Exception("Timed out!")
# MAIN INTERACTION LOOP #
def main(env, inst, method_name=None, episodes=1):
print(f'preparing to launch instance {inst} of domain {env}...')
# get the environment info
EnvInfo = ExampleManager.GetEnvInfo(env)
# set up the environment class, choose instance 0 because every example has at least one example instance
log = False if method_name is None else True
myEnv = RDDLEnv.RDDLEnv(domain=EnvInfo.get_domain(),
instance=EnvInfo.get_instance(inst),
enforce_action_constraints=False,
debug=False,
log=log,
simlogname=method_name)
budget = myEnv.Budget
# default noop agent, do not change
defaultAgent = NoOpAgent(action_space=myEnv.action_space,
num_actions=myEnv.numConcurrentActions)
###
# Initialize
agent = MyRDDLAgent(action_space=myEnv.action_space,
num_actions=myEnv.numConcurrentActions)
### Test a few agents...
signal.signal(signal.SIGALRM, signal_handler)
for episode in range(episodes):
total_reward = 0
state = myEnv.reset()
timed_out = False
elapsed = budget
finish = start = 0
for step in range(myEnv.horizon):
# action selection:
if not timed_out:
signal.setitimer(signal.ITIMER_REAL, elapsed)
try:
start = time.time()
### Call Agent
action = MyRDDLAgent.sample_action()
### Test few sample actions ...
finish = time.time()
except:
print('Timed out!')
print('This episode will continue with default actions!')
action = defaultAgent.sample_action()
timed_out = True
elapsed = 0
if not timed_out:
elapsed = elapsed - (finish-start)
else:
action = defaultAgent.sample_action()
next_state, reward, done, info = myEnv.step(action)
total_reward += reward
print()
print(f'step = {step}')
print(f'state = {state}')
print(f'action = {action}')
print(f'next state = {next_state}')
print(f'reward = {reward}')
state = next_state
if done:
break
print(f'episode {episode+1} ended with reward {total_reward} after {budget-elapsed} seconds')
myEnv.close()
########################################
# CLEAN UP ANY RESOURCES #
########################################
# Command line interface, DO NOT CHANGE
if __name__ == "__main__":
args = sys.argv
print(args)
method_name = None
episodes = 1
if len(args) == 2:
if args[0] == '-h':
print('python GymExample.py <domain> <instance> <method name> <num episodes>')
if len(args) < 3:
env, inst = 'HVAC', '1'
elif len(args) < 4:
env, inst = args[1:3]
elif len(args) < 5:
env, inst, method_name = args[1:4]
else:
env, inst, method_name, episodes = args[1:5]
try:
episodes = int(episodes)
except:
raise ValueError("episode must be an integer value argument, received: " + episodes)
main(env, inst, method_name, episodes)