-
Notifications
You must be signed in to change notification settings - Fork 0
/
models.py
113 lines (90 loc) · 3.16 KB
/
models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
from copy import deepcopy
import torch
import torch.nn as nn
import torch.nn.functional as F
import numpy as np
from util import to_numpy
if torch.cuda.is_available():
FloatTensor = torch.cuda.FloatTensor
else:
FloatTensor = torch.FloatTensor
class RLNN(nn.Module):
def __init__(self, state_dim, action_dim, max_action):
super(RLNN, self).__init__()
self.state_dim = state_dim
self.action_dim = action_dim
self.max_action = max_action
def set_params(self, params):
"""
Set the params of the network to the given parameters
"""
cpt = 0
for param in self.parameters():
tmp = np.product(param.size())
if torch.cuda.is_available():
param.data.copy_(torch.from_numpy(
params[cpt:cpt + tmp]).view(param.size()).cuda())
else:
param.data.copy_(torch.from_numpy(
params[cpt:cpt + tmp]).view(param.size()))
cpt += tmp
def get_params(self):
"""
Returns parameters of the actor
"""
return deepcopy(np.hstack([to_numpy(v).flatten() for v in
self.parameters()]))
def get_grads(self):
"""
Returns the current gradient
"""
return deepcopy(np.hstack([to_numpy(v.grad).flatten() for v in self.parameters()]))
def get_size(self):
"""
Returns the number of parameters of the network
"""
return self.get_params().shape[0]
def load_model(self, filename, net_name):
"""
Loads the model
"""
if filename is None:
return
self.load_state_dict(
torch.load('{}/{}.pkl'.format(filename, net_name),
map_location=lambda storage, loc: storage)
)
def save_model(self, output, net_name):
"""
Saves the model
"""
torch.save(
self.state_dict(),
'{}/{}.pkl'.format(output, net_name)
)
class Actor(RLNN):
def __init__(self, state_dim, action_dim, max_action, layer_norm=False, init=True):
super(Actor, self).__init__(state_dim, action_dim, max_action)
self.l1 = nn.Linear(state_dim, 400)
self.l2 = nn.Linear(400, 300)
self.l3 = nn.Linear(300, action_dim)
self.layer_norm = layer_norm #not used yet
self.optimizer = torch.optim.Adam(self.parameters(), lr=0.01)
def forward(self, x):
print("forward")
x = torch.tanh(self.l1(x))
x = torch.tanh(self.l2(x))
x = self.max_action * torch.tanh(self.l3(x))
return x
class Critic(RLNN):
def __init__(self, state_dim, action_dim, layer_norm=False):
super(Critic, self).__init__(state_dim, action_dim, 1)
self.l1 = nn.Linear(state_dim + action_dim, 400)
self.l2 = nn.Linear(400, 300)
self.l3 = nn.Linear(300, 1)
self.layer_norm = layer_norm #not used yet
def forward(self, x, u):
x = torch.nn.functional.relu(self.l1(torch.cat([x, u], 1)))#torch.leaky_relu
x = torch.nn.functional.relu(self.l2(x))
x = self.l3(x)
return x