This repository has been archived by the owner on Mar 31, 2019. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 71
/
rnn.py
397 lines (292 loc) · 13.5 KB
/
rnn.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
import lasagne
from lasagne import init
from lasagne.layers import DenseLayer, NonlinearityLayer, DropoutLayer
from ..utils.format import check_list
from ..utils.layers import clip_grads, add, mul
from ..memory.gate import GateLayer
# Vanilla RNN cell
def RNNCell(prev_state,
input_or_inputs=tuple(),
nonlinearity=lasagne.nonlinearities.tanh,
num_units=None,
name=None,
grad_clipping=0,
Whid = init.Uniform(),
Winp = init.Uniform(),
b = init.Constant(),
):
"""
Implements a one-step recurrent neural network (RNN) with arbitrary number of units.
:param prev_state: input that denotes previous state (shape must be (None, n_units) )
:param input_or_inputs: a single layer or a list/tuple of layers that go as inputs
:param nonlinearity: which nonlinearity to use
:param num_units: how many recurrent cells to use. None means "as in prev_state"
:param grad_clipping: maximum gradient absolute value. 0 or None means "no clipping"
:returns: updated memory layer
:rtype: lasagne.layers.Layer
for developers:
Works by stacking DenseLayers with ElemwiseSumLayer.
is a function mock, not actual class.
"""
assert len(prev_state.output_shape) == 2
# if needed, infer num_units
if num_units is None:
num_units = prev_state.output_shape[1]
# else check it
assert num_units == prev_state.output_shape[1]
inputs = check_list(input_or_inputs)
if grad_clipping:
prev_state = clip_grads(prev_state, grad_clipping)
inputs = [clip_grads(lyr, grad_clipping) for lyr in inputs]
# from prev state to current state (with bias)
hid_to_hid = DenseLayer(prev_state,
num_units=num_units,
nonlinearity=None,
name=(name or "") + ".hid_to_hid",
W= Whid,
b= b)
Winp = check_list(Winp)
if len(Winp) ==1:
Winp *=len(inputs)
# from inputs to current state (without bias)
inputs_to_hid = [DenseLayer(input_layer,
num_units=num_units,
nonlinearity=None,
W=Winp[i],
b=None, #This disables additional bias layers
name=(name or "") + ".input%i_to_hid" % (i))
for i, input_layer in enumerate(inputs)]
# stack them
elwise_sum = add(*([hid_to_hid] + inputs_to_hid), name=(name or "") + ".sum")
# finally, apply nonlinearity
new_hid = NonlinearityLayer(elwise_sum,
nonlinearity,
name=(name or "") + ".new_state")
return new_hid
def GRUCell(prev_state,
input_or_inputs=tuple(),
num_units=None,
weight_init=init.Normal(),
bias_init=init.Constant(),
forgetgate_nonlinearity=lasagne.nonlinearities.sigmoid,
updategate_nonlinearity=lasagne.nonlinearities.sigmoid,
hidden_update_nonlinearity=lasagne.nonlinearities.tanh,
dropout=0,
name="YetAnotherGRULayer",
grad_clipping=0,
):
"""
Implements a one-step gated recurrent unit (GRU) with arbitrary number of units.
:param prev_state: input that denotes previous state (shape must be (None, n_units) )
:type prev_state: lasagne.layers.Layer
:param input_or_inputs: a single layer or a list/tuple of layers that go as inputs
:type input_or_inputs: lasagne.layers.Layer or list of such
:param num_units: how many recurrent cells to use. None means "as in prev_state"
:type num_units: int
:param weight_init: either a lasagne initializer to use for every gate weights
or a list of two initializers:
- first used for all weights from hidden -> <any>_gate and hidden update
- second used for all weights from input(s) -> <any>_gate weights and hidden update
or a list of two objects elements:
- second list is hidden -> forget gate, update gate, hidden update
- second list of lists where
list[i][0,1,2] = input[i] -> [forget gate, update gate, hidden update]
:param <any>_nonlinearity: which nonlinearity to use for a particular gate
:param dropout: dropout rate as per https://arxiv.org/abs/1603.05118
:param grad_clipping: maximum gradient absolute value. 0 or None means "no clipping"
:returns: updated memory layer
:rtype: lasagne.layers.Layer
for developers:
Works by stacking other lasagne layers;
is a function mock, not actual class.
"""
assert len(prev_state.output_shape) == 2
# if required, infer num_units
if num_units is None:
num_units = prev_state.output_shape[1]
# else check it
assert num_units == prev_state.output_shape[1]
inputs = check_list(input_or_inputs)
#handle weight init
weight_init = check_list(weight_init)
if len(weight_init) == 1:
weight_init *= 2
hidden_W_init, input_W_init = weight_init
# hidden to gates
hid_to_gates = GateLayer(prev_state, [num_units] * 3,
gate_nonlinearities=None,
channel_names=["to_resetgate","to_updategate", "to_hidden_update"],
bias_init=None,
weight_init=hidden_W_init,
name=name or "")
hid_forget, hid_update, hidden_update_hid = hid_to_gates.values()
# clip grads #1
if grad_clipping:
inputs = [clip_grads(lyr, grad_clipping) for lyr in inputs]
hid_forget, hid_update, hidden_update_hid = [clip_grads(lyr, grad_clipping) for lyr in
[hid_forget, hid_update, hidden_update_hid]]
# input to gates
inp_to_gates = GateLayer(inputs, [num_units] * 3,
gate_nonlinearities=None,
channel_names=["to_resetgate", "to_updategate", "to_hidden_update"],
bias_init = bias_init,
weight_init = input_W_init,
name=name or "")
inp_forget, inp_update, hidden_update_in = inp_to_gates.values()
# compute forget and update gates
forgetgate = NonlinearityLayer(
add(inp_forget, hid_forget),
forgetgate_nonlinearity,
name=(name or "")+".forgetgate"
)
updategate = NonlinearityLayer(
add(inp_update, hid_update),
updategate_nonlinearity,
name=(name or "")+".updategate"
)
inv_updategate = NonlinearityLayer(updategate,
lambda x: 1 - x,
name=(name or "")+".[1 - updategate]")
# compute hidden update
hidden_update = add(
hidden_update_in,
mul(forgetgate, hidden_update_hid),
name=(name or "")+".hid_update"
)
# clip grads #2
if grad_clipping:
hidden_update = clip_grads(hidden_update,
grad_clipping)
hidden_update = NonlinearityLayer(hidden_update,
hidden_update_nonlinearity)
if dropout != 0:
hidden_update = DropoutLayer(hidden_update,p=dropout)
# compute new hidden values
new_hid = add(
mul(inv_updategate, prev_state),
mul(updategate, hidden_update),
name=name
)
return new_hid
def LSTMCell(prev_cell,
prev_out,
input_or_inputs=tuple(),
num_units=None,
peepholes=True,
weight_init=init.Normal(),
bias_init=init.Constant(),
peepholes_W_init=init.Normal(),
forgetgate_nonlinearity=lasagne.nonlinearities.sigmoid,
inputgate_nonlinearity=lasagne.nonlinearities.sigmoid,
outputgate_nonlinearity=lasagne.nonlinearities.sigmoid,
cell_nonlinearity=lasagne.nonlinearities.tanh,
output_nonlinearity=lasagne.nonlinearities.tanh,
dropout=0.,
name=None,
grad_clipping=0.,
):
"""
Implements a one-step LSTM update. Note that LSTM requires both c_t (private memory) and h_t aka output.
:param prev_cell: input that denotes previous "private" state (shape must be (None, n_units) )
:type prev_cell: lasagne.layers.Layer
:param prev_out: input that denotes previous "public" state (shape must be (None,n_units))
:type prev_out: lasagne.layers.Layer
:param input_or_inputs: a single layer or a list/tuple of layers that go as inputs
:type input_or_inputs: lasagne.layers.Layer or list of such
:param num_units: how many recurrent cells to use. None means "as in prev_state"
:type num_units: int
:param peepholes: If True, the LSTM uses peephole connections.
When False, peepholes_W_init are ignored.
:type peepholes: bool
:param bias_init: either a lasagne initializer to use for every gate weights
or a list of 4 initializers for [input gate, forget gate, cell, output gate]
:param weight_init: either a lasagne initializer to use for every gate weights:
or a list of two initializers,
- first used for all weights from hidden -> <all>_gate and cell
- second used for all weights from input(s) -> <all>_gate weights and cell
or a list of two objects elements,
- second list is hidden -> input gate, forget gate, cell, output gate,
- second list of lists where list[i][0,1,2] = input[i] -> [input_gate, forget gate, cell,output gate ]
:param peepholes_W_init: either a lasagne initializer or a list of 3 initializers for
[input_gate, forget gate,output gate ] weights. If peepholes=False, this is ignored.
:param <any>_nonlinearity: which nonlinearity to use for a particular gate
:param dropout: dropout rate as per https://arxiv.org/pdf/1603.05118.pdf
:param grad_clipping: maximum gradient absolute value. 0 or None means "no clipping"
:returns: a tuple of (new_cell,new_output) layers
:rtype: (lasagne.layers.Layer,lasagne.layers.Layer)
for developers:
Works by stacking other lasagne layers;
is a function mock, not actual class.
"""
assert len(prev_cell.output_shape) == 2
# if required, infer num_units
if num_units is None:
num_units = prev_cell.output_shape[1]
# else check it
assert num_units == prev_cell.output_shape[1]
# gates and cell (before nonlinearities)
gates = GateLayer([prev_out] + check_list(input_or_inputs),
[num_units] * 4,
channel_names=["to_ingate", "to_forgetgate", "to_cell", "to_outgate"],
gate_nonlinearities=None,
bias_init=bias_init,
weight_init=weight_init,
name=name or "")
ingate, forgetgate, cell_input, outputgate = gates.values()
# clip grads #1
if grad_clipping:
ingate, forgetgate, cell_input, outputgate = [clip_grads(lyr, grad_clipping) for lyr in
[ingate, forgetgate, cell_input, outputgate]]
if peepholes:
# cast bias init to a list
peepholes_W_init = check_list(peepholes_W_init)
assert len(peepholes_W_init) in (1, 3)
if len(peepholes_W_init) == 1:
peepholes_W_init *= 3
W_cell_to_ingate_init,W_cell_to_forgetgate_init= peepholes_W_init[:2]
peep_ingate = lasagne.layers.ScaleLayer(prev_cell,W_cell_to_ingate_init,shared_axes=[0,],
name= (name or "") + ".W_cell_to_ingate_peephole")
peep_forgetgate = lasagne.layers.ScaleLayer(prev_cell,W_cell_to_forgetgate_init,shared_axes=[0,],
name= (name or "") + ".W_cell_to_forgetgate_peephole")
ingate = add(ingate,peep_ingate)
forgetgate = add(forgetgate,peep_forgetgate)
# nonlinearities
ingate = NonlinearityLayer(
ingate,
inputgate_nonlinearity,
name=(name or "")+".inputgate"
)
forgetgate = NonlinearityLayer(
forgetgate,
forgetgate_nonlinearity,
name=(name or "")+".forgetgate"
)
cell_input = NonlinearityLayer(cell_input,
nonlinearity=cell_nonlinearity,
name=(name or "")+'.cell_nonlinearity')
if dropout != 0:
cell_input = DropoutLayer(cell_input,p=dropout)
# cell = input * ingate + prev_cell * forgetgate
new_cell= add(mul(cell_input,ingate),
mul(prev_cell, forgetgate))
# output gate
if peepholes:
W_cell_to_outgate_init = peepholes_W_init[2]
peep_outgate = lasagne.layers.ScaleLayer(new_cell,W_cell_to_outgate_init,shared_axes=[0,],
name= (name or "") + ".W_cell_to_outgate_peephole")
outputgate = add(outputgate, peep_outgate)
outputgate = NonlinearityLayer(
outputgate,
outputgate_nonlinearity,
name=(name or "")+".outgate"
)
#cell output
new_output= NonlinearityLayer(new_cell,
output_nonlinearity,
name=(name or "")+'.outgate_nonlinearity')
new_output = mul(
outputgate,
new_output,
name=(name or "")+'.outgate'
)
return new_cell, new_output