@@ -45,15 +45,15 @@ class CRF(Layer):
45
45
model = Sequential()
46
46
model.add(Embedding(3001, 300, mask_zero=True)(X)
47
47
48
- # use learn_mode = 'join', test_mode = 'viterbi'
49
- crf = CRF(10)
50
- model.add(crf(Embed) )
48
+ # use learn_mode = 'join', test_mode = 'viterbi', sparse_target = True (label indice output)
49
+ crf = CRF(10, sparse_target=True )
50
+ model.add(crf)
51
51
52
52
# crf.accuracy is default to Viterbi acc if using join-mode (default).
53
53
# One can add crf.marginal_acc if interested, but may slow down learning
54
54
model.compile('adam', loss=crf.loss_function, metrics=[crf.accuracy])
55
55
56
- # y can be either onehot representation or label indices (with shape 1 at dim 3)
56
+ # y must be label indices (with shape 1 at dim 3) here, since `sparse_target=True`
57
57
model.fit(x, y)
58
58
59
59
# prediction give onehot representation of Viterbi best path
@@ -331,16 +331,16 @@ def get_logZ(self, in_energy, mask):
331
331
def get_energy (self , y_true , in_energy , mask ):
332
332
'''Energy = a1' y1 + u1' y1 + y1' U y2 + u2' y2 + y2' U y3 + u3' y3 + an' y3
333
333
'''
334
- in_energy = K .sum (in_energy * y_true , 2 ) # (B, T)
334
+ in_energy = K .sum (in_energy * y_true , 2 ) # (B, T)
335
335
chain_energy = K .sum (K .dot (y_true [:, :- 1 , :], self .U ) * y_true [:, 1 :, :], 2 ) # (B, T-1)
336
336
chain_energy = self .chain_activation (chain_energy )
337
337
338
338
if mask is not None :
339
339
mask = K .cast (mask , K .floatx ())
340
- chain_mask = mask [:, :- 1 ] * mask [:, 1 :] # (B, T-1), mask[:,:-1]*mask[:,1:] makes it work with any padding
340
+ chain_mask = mask [:, :- 1 ] * mask [:, 1 :] # (B, T-1), mask[:,:-1]*mask[:,1:] makes it work with any padding
341
341
in_energy = in_energy * mask
342
342
chain_energy = chain_energy * chain_mask
343
- total_energy = K .sum (in_energy , - 1 ) + K .sum (chain_energy , - 1 ) # (B, )
343
+ total_energy = K .sum (in_energy , - 1 ) + K .sum (chain_energy , - 1 ) # (B, )
344
344
345
345
return total_energy
346
346
@@ -368,19 +368,19 @@ def step(self, in_energy_t, states, return_logZ=True):
368
368
t = K .cast (i [0 , 0 ], dtype = 'int32' )
369
369
if len (states ) > 3 :
370
370
if K ._BACKEND == 'theano' :
371
- m = states [3 ][:, t :(t + 2 )]
371
+ m = states [3 ][:, t :(t + 2 )]
372
372
else :
373
373
m = tf .slice (states [3 ], [0 , t ], [- 1 , 2 ])
374
374
in_energy_t = in_energy_t * K .expand_dims (m [:, 0 ])
375
375
chain_energy = chain_energy * K .expand_dims (K .expand_dims (m [:, 0 ] * m [:, 1 ])) # (1, F, F)*(B, 1, 1) -> (B, F, F)
376
376
if return_logZ :
377
- energy = chain_energy + K .expand_dims (in_energy_t - prev_target_val , 2 ) # shapes: (1, B, F) + (B, F, 1) -> (B, F, F)
378
- new_target_val = self .log_sum_exp (- energy , 1 ) # shapes: (B, F)
377
+ energy = chain_energy + K .expand_dims (in_energy_t - prev_target_val , 2 ) # shapes: (1, B, F) + (B, F, 1) -> (B, F, F)
378
+ new_target_val = self .log_sum_exp (- energy , 1 ) # shapes: (B, F)
379
379
return new_target_val , [new_target_val , i + 1 ]
380
380
else :
381
381
energy = chain_energy + K .expand_dims (in_energy_t + prev_target_val , 2 )
382
382
min_energy = K .min (energy , 1 )
383
- argmin_table = K .cast (K .argmin (energy , 1 ), K .floatx ()) # cast for tf-version `K.rnn`
383
+ argmin_table = K .cast (K .argmin (energy , 1 ), K .floatx ()) # cast for tf-version `K.rnn`
384
384
return argmin_table , [min_energy , i + 1 ]
385
385
386
386
def recursion (self , in_energy , mask = None , go_backwards = False , return_sequences = True , return_logZ = True ):
@@ -403,8 +403,8 @@ def recursion(self, in_energy, mask=None, go_backwards=False, return_sequences=T
403
403
If `return_logZ = False`, compute the Viterbi's best path lookup table.
404
404
'''
405
405
chain_energy = self .chain_activation (self .U )
406
- chain_energy = K .expand_dims (chain_energy , 0 ) # shape=(1, F, F): F=num of output features. 1st F is for t-1, 2nd F for t
407
- prev_target_val = K .zeros_like (in_energy [:, 0 , :]) # shape=(B, F), dtype=float32
406
+ chain_energy = K .expand_dims (chain_energy , 0 ) # shape=(1, F, F): F=num of output features. 1st F is for t-1, 2nd F for t
407
+ prev_target_val = K .zeros_like (in_energy [:, 0 , :]) # shape=(B, F), dtype=float32
408
408
409
409
if go_backwards :
410
410
in_energy = K .reverse (in_energy , 1 )
@@ -458,7 +458,7 @@ def viterbi_decoding(self, X, mask=None):
458
458
459
459
# backward to find best path, `initial_best_idx` can be any, as all elements in the last argmin_table are the same
460
460
argmin_tables = K .reverse (argmin_tables , 1 )
461
- initial_best_idx = [K .expand_dims (argmin_tables [:, 0 , 0 ])] # matrix instead of vector is required by tf `K.rnn`
461
+ initial_best_idx = [K .expand_dims (argmin_tables [:, 0 , 0 ])] # matrix instead of vector is required by tf `K.rnn`
462
462
463
463
def gather_each_row (params , indices ):
464
464
n = K .shape (indices )[0 ]
0 commit comments