Skip to content
This repository has been archived by the owner on Nov 17, 2023. It is now read-only.

Commit

Permalink
update per comments
Browse files Browse the repository at this point in the history
  • Loading branch information
szha committed Aug 22, 2017
1 parent 946b90e commit c70505b
Show file tree
Hide file tree
Showing 4 changed files with 130 additions and 136 deletions.
105 changes: 47 additions & 58 deletions python/mxnet/gluon/loss.py
Original file line number Diff line number Diff line change
Expand Up @@ -306,93 +306,82 @@ class CTCLoss(Loss):
Sequence Data with Recurrent Neural Networks"
<http://www.cs.toronto.edu/~graves/icml_2006.pdf>`_ paper for more information.
The prediction output should be an activation vector without softmax, with shape
according to the output_layout:
**TNC**: *(sequence_length, batch_size, alphabet_size + 1)*
**NTC**: *(batch_size, sequence_length, alphabet_size + 1)*
The loss output has the shape:
**loss**: *(batch_size,)*.
``label`` is a tensor of integers between 1 and *alphabet_size*, with shape according
to the label_layout:
**NT**: *(batch_size, label_sequence_length)*
**TN**: *(label_sequence_length, batch_size)*
If a sequence of labels is shorter than *label_sequence_length*, use the special
padding character 0 at the end of the sequence to conform it to the correct
length. For example, if *label_sequence_length* = 4, and one has two sequences
of labels [2, 1] and [3, 2, 2], the resulting ```label``` tensor should be
padded to be::
[[2, 1, 0, 0], [3, 2, 2, 0]]
Parameters
----------
output_layout : str, default 'NTC'
layout : str, default 'NTC'
Layout of the output sequence activation vector.
label_layout : str, default 'NT'
Layout of the labels.
use_input_lengths : bool, default False
Whether to use `input_lengths` to decide lengths of inputs.
If false, the input lengths are treated as being equal to the max sequence length.
use_label_lengths : bool, default False
Whether to use `label_lengths` to decide lengths of labels.
If false, the label lengths are derived from the first occurrence of
the value specified by `padding_mask`.
padding_mask : int or None, default -1
This is the label value to be considered padding, which is used to derive the actual
lengths of labels. Only required when `use_label_lengths` is false.
lengths of labels. Only required when `label_lengths` is None.
weight : float or None
Global scalar weight for loss.
input_lengths : NDArray or None,
Actual lengths of inputs. Only required when `use_input_lengths` is true.
This should be used as the third argument when calling this loss.
The shape should be (N,).
label_lengths : NDArray or None,
Lengths of labels. Only required when `use_label_lengths` is true.
This should be used as the fourth argument when calling this loss.
The shape should be (N,).
sample_weight : Symbol or None
Per sample weighting. Must be broadcastable to
the same shape as loss. For example, if loss has
shape (64, 10) and you want to weight each sample
in the batch, `sample_weight` should have shape (64, 1).
This should be used as the fifth argument when calling this loss.
Input shapes:
`data` is an activation tensor without softmax.
Its shape depends on `layout`. For `layout='TNC'`, this
input has shape `(sequence_length, batch_size, alphabet_size)`
`label` is the label index matrix.
Its shape depends on `label_layout`. For `label_layout='TN'`, this
input has shape `(label_sequence_length, batch_size)`
When `label_lengths` is not specified, the first occurrence of `padding_mask`
in each sample marks the end of the label sequence of that sample.
For example, suppose there are two samples, with *label_sequence_length* = 4.
The two sequences of labels are [2, 1] and [3, 2, 2], and their actual lengths
are smaller than 4. Thus, given *padding_mask* = 0, the resulting ```label```
tensor should be padded to be::
[[2, 1, 0, 0], [3, 2, 2, 0]]
`data_lengths` is optional and defaults to None.
When specified, it represents the actual lengths of data.
The shape should be (batch_size,).
If None, the data lengths are treated as being equal to the max sequence length.
This should be used as the third argument when calling this loss.
`label_lengths` is optional and defaults to None.
When specified, it represents the actual lengths of labels.
The shape should be (batch_size,).
If None, the label lengths are derived from the first occurrence of
the value specified by `padding_mask`.
This should be used as the fourth argument when calling this loss.
Output shape:
The CTC loss output has the shape (batch_size,).
"""
def __init__(self, output_layout='NTC', label_layout='NT',
use_input_lengths=False, use_label_lengths=False, padding_mask=-1,
def __init__(self, layout='NTC', label_layout='NT', padding_mask=-1,
weight=None, **kwargs):
assert output_layout in ['NTC', 'TNC'],\
"Only 'NTC' and 'TNC' layouts for output are supported. Got: %s"%output_layout
assert layout in ['NTC', 'TNC'],\
"Only 'NTC' and 'TNC' layouts for output are supported. Got: %s"%layout
assert label_layout in ['NT', 'TN'],\
"Only 'NT' and 'TN' layouts for label are supported. Got: %s"%label_layout
self._output_layout = output_layout
self._layout = layout
self._label_layout = label_layout
self._use_input_lengths = use_input_lengths
self._use_label_lengths = use_label_lengths
self._padding_mask = padding_mask
batch_axis = label_layout.find('N')
super(CTCLoss, self).__init__(weight, batch_axis, **kwargs)

def hybrid_forward(self, F, output, label,
input_lengths=None, label_lengths=None, sample_weight=None):
assert not self._use_input_lengths or input_lengths is not None, \
"Must specify input_lengths."
assert not self._use_label_lengths or label_lengths is not None, \
"Must specify label_lengths."
if self._output_layout == 'NTC':
output = F.swapaxes(output, 0, 1)
def hybrid_forward(self, F, data, label,
data_lengths=None, label_lengths=None, sample_weight=None):
if self._layout == 'NTC':
data = F.swapaxes(data, 0, 1)
if self._batch_axis == 1:
label = F.swapaxes(label, 0, 1)
if F is ndarray:
F_contrib = ndarray_contrib
else:
F_contrib = symbol_contrib
loss = F_contrib.CTCLoss(output, label,
use_input_lengths=self._use_input_lengths,
use_label_lengths=self._use_label_lengths,
input_lengths=input_lengths, label_lengths=label_lengths,
loss = F_contrib.CTCLoss(data, label,
use_data_lengths=data_lengths is not None,
use_label_lengths=label_lengths is not None,
data_lengths=data_lengths, label_lengths=label_lengths,
padding_mask=self._padding_mask)
return _apply_weighting(F, loss, self._weight, sample_weight)
Loading

0 comments on commit c70505b

Please sign in to comment.