forked from CW-Huang/BayesianHypernet
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathreg_example.py
123 lines (95 loc) · 3.7 KB
/
reg_example.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
# Ryan Turner ([email protected])
import numpy as np
from scipy.misc import logsumexp
import theano
import theano.tensor as T
import hypernet_trainer as ht
import ign.ign as ign
from ign.t_util import make_shared_dict, make_unshared_dict
def dm_example(N):
x = 0.5 * np.random.rand(N, 1)
std_dev = np.sqrt(0.02)
noise = std_dev * np.random.randn(N, 1)
x_n = x + noise
y = x + 0.3 * np.sin(2.0 * np.pi * x_n) + 0.3 * np.sin(4.0 * np.pi * x_n) \
+ noise
return x, y
def unpack(v, weight_shapes):
L = []
tt = 0
for ws in weight_shapes:
num_param = np.prod(ws)
L.append(v[tt:tt + num_param].reshape(ws))
tt += num_param
return L
def loglik_primary_f(X, y, theta, weight_shapes):
W0, b0, W1, b1 = unpack(theta, weight_shapes)
a1 = T.maximum(0.0, T.dot(X, W0) + b0[None, :])
yp = T.dot(a1, W1) + b1[None, :]
err = yp - y
# Assuming std=1 here
loglik = -0.5 * T.sum(err ** 2, axis=1) # Ignoring normalizing constant
return loglik
def logprior_f(theta):
# Standard Gauss
logprior = -0.5 * T.sum(theta ** 2) # Ignoring normalizing constant
return logprior
def simple_test(X, y, X_valid, y_valid,
n_epochs, n_batch, init_lr, weight_shapes,
n_layers=5, vis_freq=100, n_samples=100):
N, D = X.shape
N_valid = X_valid.shape[0]
assert(y.shape == (N, 1)) # Univariate for now
assert(X_valid.shape == (N_valid, D) and y_valid.shape == (N_valid, 1))
num_params = sum(np.prod(ws) for ws in weight_shapes)
layers = ign.init_ign(n_layers, num_params)
phi_shared = make_shared_dict(layers, '%d%s')
ll_primary_f = lambda X, y, w: loglik_primary_f(X, y, w, weight_shapes)
hypernet_f = lambda z: ign.network_T_and_J(z[None, :], phi_shared)[0][0, :]
# TODO verify this length of size 1
log_det_dtheta_dz_f = lambda z: T.sum(ign.network_T_and_J(z[None, :], phi_shared)[1])
R = ht.build_trainer(phi_shared.values(), N, ll_primary_f, logprior_f, hypernet_f,
log_det_dtheta_dz_f=log_det_dtheta_dz_f)
trainer, get_err, test_loglik = R
batch_order = np.arange(int(N / n_batch))
cost_hist = np.zeros(n_epochs)
loglik_valid = np.zeros(n_epochs)
for epoch in xrange(n_epochs):
np.random.shuffle(batch_order)
cost = 0.0
current_lr = init_lr
for ii in batch_order:
x_batch = X[ii * n_batch:(ii + 1) * n_batch]
y_batch = y[ii * n_batch:(ii + 1) * n_batch]
z_noise = np.random.randn(num_params)
batch_cost = trainer(x_batch, y_batch, z_noise, current_lr)
cost += batch_cost
cost /= len(batch_order)
print cost
cost_hist[epoch] = cost
loglik_valid_s = np.zeros((N_valid, n_samples))
for ss in xrange(n_samples):
z_noise = np.random.randn(num_params)
loglik_valid_s[:, ss] = test_loglik(X_valid, y_valid, z_noise)
loglik_valid[epoch] = np.mean(logsumexp(loglik_valid_s - np.log(n_samples), axis=1))
print 'valid %f' % loglik_valid[epoch]
phi = make_unshared_dict(phi_shared)
return phi, cost_hist, loglik_valid
if __name__ == '__main__':
np.random.seed(5645)
init_lr = 0.001
n_epochs = 500
n_batch = 32
N = 1000
primary_layers = 1
input_dim = 1
hidden_dim = 10
output_dim = 1
weight_shapes = [(input_dim, hidden_dim), (hidden_dim,),
(hidden_dim, output_dim), (output_dim,)]
X, y = dm_example(N)
X_valid, y_valid = dm_example(N)
phi, cost_hist, loglik_valid = \
simple_test(X, y, X_valid, y_valid,
n_epochs, n_batch, init_lr, weight_shapes,
n_layers=2)