-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
288 lines (235 loc) · 11.7 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
# from multiprocessing import freeze_support
# import gymnasium as gym
# from stable_baselines3 import PPO
# from sb3_contrib import TRPO
# # Noise class for TD3
# from stable_baselines3.common.noise import NormalActionNoise, OrnsteinUhlenbeckActionNoise
# import os
# from iem_wrapper import IEMWrapper
# from stable_baselines3.common.env_util import make_vec_env
# from stable_baselines3.common.vec_env import SubprocVecEnv
# import torch
# import numpy as np
# from stable_baselines3.common.monitor import Monitor
# import time
# from multiprocessing import Pool
# from iem import IEMModule
# from re3 import RandomEncoder
# import argparse
# from customCallBack import CustomCallback
# def def_args():
# parser = argparse.ArgumentParser(description='AI Project')
# parser.add_argument('--re3_k', type=int, default=0, help='The k value for Re3, if 0 Re3 will not be used')
# # parser.add_argument('--OU_noise_sigma', type=float, default=0.1, help='std-deviation for OrnsteinUhlenbeckActionNoise, if 0 regular gaussian noise will be used')
# parser.add_argument('--it', type=int, default=300, help='Number of desired iterations')
# parser.add_argument('--n_cores', type=int, default=32, help='Number of cores/environments')
# parser.add_argument('--n_steps', type=int, default=int(2024/(32/8)), help='Number of steps per core per rollout')
# parser.add_argument('--env_name', type=str, default='BipedalWalker-v3', help='Name of environment to train on')
# parser.add_argument('--learning_rate', type=float, default=0.0003, help='Learning rate for the model')
# parser.add_argument('--n_epochs', type=int, default=3, help='Number of epochs for the model')
# parser.add_argument('--subdir', type=str, default='', help='Subdirectory to save model and logs in')
# parser.add_argument('--use_ppo', type=bool, default=False, help='Use PPO instead of TRPO')
# parser.add_argument('--use_trpo', type=bool, default=False, help='Use TRPO instead of PPO')
# parser.add_argument('--theta', default=0, type=float, help='Theta for PPO')
# parser.add_argument('--sigma', default=0, type=float, help='Sigma for PPO')
# #check if folder and name are given, if not, terminate with error
# requiredNamed = parser.add_argument_group('required named arguments')
# #required
# requiredNamed.add_argument('--path', type=str, default=None, help='Path to model to load', required=True)
# requiredNamed.add_argument('--log_name', type=str, default=None, help='Name of model to load', required=True)
# args = parser.parse_args()
# return args
# if __name__ == '__main__':
# args = def_args()
# if args.use_ppo and args.use_trpo:
# raise Exception("Can't use both PPO and TRPO")
# if not args.use_ppo and not args.use_trpo:
# raise Exception("Must use either --use_ppo True or--use_trpo True")
# if args.use_ppo:
# print("Using PPO")
# type = 'PPO'
# if args.use_trpo:
# print("Using TRPO")
# type = 'TRPO'
# # Create folders
# # dir = "BipedalWalker-v3-iem-ppo"
# folder = args.path + '/' +args.subdir
# model_dir = f"{folder}/models/"
# log_dir = f"{folder}/logs/"
# if not os.path.exists(model_dir):
# os.makedirs(model_dir)
# if not os.path.exists(log_dir):
# os.makedirs(log_dir)
# env_name = args.env_name
# env = gym.make(env_name)
# obs_space = env.observation_space.shape[0]
# n_actions = env.action_space.shape[-1]
# OU_noise_sigma = args.sigma
# if OU_noise_sigma != 0:
# print("Using OU Noise, the sigma value is: " + str(args.sigma) + " the theta value is: " + str(args.theta))
# else:
# print("Using Gaussian Noise")
# if args.re3_k:
# print("Using Re3")
# env.close()
# iem_hidden_size = 64
# iem_module = IEMModule(obs_space, iem_hidden_size)
# re3_module = RandomEncoder(obs_space, 128)
# # model_name = f'{args.log_dir}_k{args.re3_k}' # missing sigma since floats are bad for file names...
# model_name = f'{args.log_name}'
# print(f"Saving model as {model_name}")
# iterations = args.it
# n_env = args.n_cores
# n_steps_per_core = args.n_steps
# n_steps_per_update = n_env * n_steps_per_core
# total_timesteps = iterations * n_steps_per_update
# print(f"Training for {iterations} iterations with {n_steps_per_update} steps per update for a total of {total_timesteps} timesteps")
# # total_timesteps = 3000000
# # n_steps_per_update = 16192 # 8 * 2024 = 16192
# # n_steps_per_core = int(n_steps_per_update // n_env) # PPO needs this one
# freeze_support()
# env_name = args.env_name
# env_fns = [lambda: Monitor(IEMWrapper(gym.make(env_name), iem_module, re3_module)) for _ in range(n_env)]
# env = SubprocVecEnv(env_fns)
# if type == 'PPO':
# model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_dir, n_steps=n_steps_per_core, batch_size=n_env, n_epochs=args.n_epochs, theta=args.theta, sigma=args.sigma)
# if type == 'TRPO':
# model = TRPO('MlpPolicy', env, verbose=1, tensorboard_log=log_dir, n_steps=n_steps_per_core, batch_size=n_env, theta=args.theta, sigma=args.sigma)
# IEMcallback = CustomCallback(iem=iem_module, re3=re3_module, k=args.re3_k)
# model.learn(total_timesteps=total_timesteps, callback=IEMcallback, progress_bar=True)
# model.save(model_dir + model_name)
# env.close()
# #find the newest folder in the folder and rename it
# folders = os.listdir(log_dir)
# folders.sort(key=lambda x: os.path.getmtime(log_dir + '/' + x))
# folder_name = folders[-1]
# print(folder_name)
# os.rename(log_dir + '/' + folder_name, log_dir + '/' + model_name)
# '''BipedalWalker-v3:
# normalize: true
# n_envs: 32
# n_timesteps: !!float 5e6
# policy: 'MlpPolicy'
# n_steps: 2048
# batch_size: 64
# gae_lambda: 0.95
# gamma: 0.999
# n_epochs: 10
# ent_coef: 0.0
# learning_rate: !!float 3e-4
# clip_range: 0.18
# BipedalWalkerHardcore-v3:
# normalize: true
# n_envs: 16
# n_timesteps: !!float 10e7
# policy: 'MlpPolicy'
# n_steps: 2048
# batch_size: 64
# gae_lambda: 0.95
# gamma: 0.99
# n_epochs: 10
# ent_coef: 0.001
# learning_rate: lin_2.5e-4
# clip_range: lin_0.2'''
# #/home/thops19/Documents/9semester/PPO-for-Beginners/venv/bin/python /home/thops19/Documents/9semester/Project-in-Artificial-Intelligence-gym-challenge-/iem-ppo.py --path tmp/tmp --log_name sigma_0.01_theta_1 --sigma 0.1 --theta 1 --n_steps 2024
from multiprocessing import freeze_support
import gymnasium as gym
from stable_baselines3 import PPO
from sb3_contrib import TRPO
import os
from wrapper import Wrapper
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import SubprocVecEnv
import torch
import numpy as np
from stable_baselines3.common.monitor import Monitor
import time
from multiprocessing import Pool
from re3 import RandomEncoder
import argparse
from customCallBack import CustomCallback
def def_args():
parser = argparse.ArgumentParser(description='AI Project')
parser.add_argument('--re3_k', type=int, default=0, help='The k value for Re3, if 0 Re3 will not be used')
parser.add_argument('--it', type=int, default=300, help='Number of desired iterations')
parser.add_argument('--n_cores', type=int, default=32, help='Number of cores/environments')
parser.add_argument('--n_steps', type=int, default=int(2024/(32/8)), help='Number of steps per core per rollout')
parser.add_argument('--env_name', type=str, default='BipedalWalker-v3', help='Name of environment to train on')
parser.add_argument('--learning_rate', type=float, default=0.0003, help='Learning rate for the model')
parser.add_argument('--n_epochs', type=int, default=3, help='Number of epochs for the model')
parser.add_argument('--subdir', type=str, default='', help='Subdirectory to save model and logs in')
parser.add_argument('--use_ppo', type=bool, default=False, help='Use PPO instead of TRPO')
parser.add_argument('--use_trpo', type=bool, default=False, help='Use TRPO instead of PPO')
parser.add_argument('--theta', default=0, type=float, help='Theta for PPO')
parser.add_argument('--sigma', default=0, type=float, help='Sigma for PPO')
#check if folder and name are given, if not, terminate with error
requiredNamed = parser.add_argument_group('required named arguments')
#required
requiredNamed.add_argument('--path', type=str, default=None, help='Path to model to load', required=True)
requiredNamed.add_argument('--log_name', type=str, default=None, help='Name of model to load', required=True)
args = parser.parse_args()
return args
if __name__ == '__main__':
args = def_args()
if args.use_ppo and args.use_trpo:
raise Exception("Can't use both PPO and TRPO")
if not args.use_ppo and not args.use_trpo:
raise Exception("Must use either --use_ppo True or--use_trpo True")
if args.use_ppo:
print("Using PPO")
type = 'PPO'
if args.use_trpo:
print("Using TRPO")
type = 'TRPO'
folder = args.path + '/' +args.subdir
model_dir = f"{folder}/models/"
log_dir = f"{folder}/logs/"
if not os.path.exists(model_dir):
os.makedirs(model_dir)
if not os.path.exists(log_dir):
os.makedirs(log_dir)
env_name = args.env_name
env = gym.make(env_name)
obs_space = env.observation_space.shape[0]
n_actions = env.action_space.shape[-1]
OU_noise_sigma = args.sigma
if OU_noise_sigma != 0:
print("Using OU Noise, the sigma value is: " + str(args.sigma) + " the theta value is: " + str(args.theta))
else:
print("Using Gaussian Noise")
if args.re3_k:
print("Using Re3")
env.close()
iem_hidden_size = 64
re3_module = RandomEncoder(obs_space, 128)
# model_name = f'{args.log_dir}_k{args.re3_k}' # missing sigma since floats are bad for file names...
model_name = f'{args.log_name}'
print(f"Saving model as {model_name}")
iterations = args.it
n_env = args.n_cores
n_steps_per_core = args.n_steps
n_steps_per_update = n_env * n_steps_per_core
total_timesteps = iterations * n_steps_per_update
print(f"Training for {iterations} iterations with {n_steps_per_update} steps per update for a total of {total_timesteps} timesteps")
# total_timesteps = 3000000
# n_steps_per_update = 16192 # 8 * 2024 = 16192
# n_steps_per_core = int(n_steps_per_update // n_env) # PPO needs this one
freeze_support()
env_name = args.env_name
env_fns = [lambda: Monitor(Wrapper(gym.make(env_name), re3_module)) for _ in range(n_env)]
env = SubprocVecEnv(env_fns)
if type == 'PPO':
model = PPO('MlpPolicy', env, verbose=1, tensorboard_log=log_dir, n_steps=n_steps_per_core, batch_size=n_env, n_epochs=args.n_epochs, theta=args.theta, sigma=args.sigma)
if type == 'TRPO':
model = TRPO('MlpPolicy', env, verbose=1, tensorboard_log=log_dir, n_steps=n_steps_per_core, batch_size=n_env, theta=args.theta, sigma=args.sigma)
# IEMcallback = CustomCallback(iem=iem_module, re3=re3_module, k=args.re3_k)
callback = CustomCallback(re3=re3_module, k=args.re3_k)
model.learn(total_timesteps=total_timesteps, callback=callback, progress_bar=True)
model.save(model_dir + model_name)
env.close()
#find the newest folder in the folder and rename it
folders = os.listdir(log_dir)
folders.sort(key=lambda x: os.path.getmtime(log_dir + '/' + x))
folder_name = folders[-1]
print(folder_name)
os.rename(log_dir + '/' + folder_name, log_dir + '/' + model_name)