Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion egs/swbd/s5c/local/chain/tuning/run_tdnn_7o.sh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
#
#
# local/chain/compare_wer_general.sh --rt03 tdnn7n_sp tdnn7m26o_sp
# System tdnn7n_sp tdnn7m26j_sp
# System tdnn7n_sp tdnn7m26o_sp
# WER on train_dev(tg) 12.18 11.74
# WER on train_dev(fg) 11.12 10.69
# WER on eval2000(tg) 14.9 14.6
Expand Down
5 changes: 4 additions & 1 deletion src/chain/chain-denominator.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,10 @@ DenominatorComputation::DenominatorComputation(
nnet_output.NumRows(),
kUndefined, kStrideEqualNumCols);
exp_nnet_output_transposed_.CopyFromMat(nnet_output, kTrans);
exp_nnet_output_transposed_.ApplyExp();
// We limit the nnet output to the range [-30,30] before doing the exp;
// this avoids NaNs appearing in the forward-backward computation, which
// is not done in log space.
exp_nnet_output_transposed_.ApplyExpLimited(-30.0, 30.0);
}


Expand Down
4 changes: 4 additions & 0 deletions src/cudamatrix/cu-kernels-ansi.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,10 @@ void cudaF_apply_ceiling(dim3 Gr, dim3 Bl, float* mat, float ceiling_val,
MatrixDim d);
void cudaD_apply_exp(dim3 Gr, dim3 Bl, double* mat, MatrixDim d);
void cudaF_apply_exp(dim3 Gr, dim3 Bl, float* mat, MatrixDim d);
void cudaD_apply_exp_limited(dim3 Gr, dim3 Bl, double* mat, MatrixDim d,
double lower_limit, double upper_limit);
void cudaF_apply_exp_limited(dim3 Gr, dim3 Bl, float* mat, MatrixDim d,
float lower_limit, float upper_limit);
void cudaD_apply_exp_special(dim3 Gr, dim3 Bl, double* out, MatrixDim out_dim,
const double* in, int in_stride);
void cudaF_apply_exp_special(dim3 Gr, dim3 Bl, float* out, MatrixDim out_dim,
Expand Down
32 changes: 32 additions & 0 deletions src/cudamatrix/cu-kernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -400,6 +400,26 @@ static void _apply_exp(Real* mat, MatrixDim d) {
}
}

template<typename Real>
__global__
static void _apply_exp_limited(Real* mat, MatrixDim d,
Real lower_limit, Real upper_limit) {
int32_cuda i = blockIdx.x * blockDim.x + threadIdx.x;
int32_cuda j = blockIdx.y * blockDim.y + threadIdx.y;
int32_cuda index = i + j * d.stride;
if (i < d.cols && j < d.rows) {
Real x = mat[index];
// I'm writing !(x >= lower_limit) instead of (x < lower_limit) so that
// nan's will be set to the lower-limit.
if (!(x >= lower_limit))
x = lower_limit;
else if (x > upper_limit)
x = upper_limit;
mat[index] = exp(x);
}
}


template<typename Real>
__global__
static void _scale_diag_packed(Real* mat, Real value, int dim) {
Expand Down Expand Up @@ -3734,6 +3754,11 @@ void cudaF_apply_exp(dim3 Gr, dim3 Bl, float* mat, MatrixDim d) {
_apply_exp<<<Gr,Bl>>>(mat,d);
}

void cudaF_apply_exp_limited(dim3 Gr, dim3 Bl, float* mat, MatrixDim d,
float lower_limit, float upper_limit) {
_apply_exp_limited<<<Gr,Bl>>>(mat, d, lower_limit, upper_limit);
}

void cudaF_apply_pow(dim3 Gr, dim3 Bl, float* mat, float power, MatrixDim d) {
_apply_pow<<<Gr,Bl>>>(mat, power, d);
}
Expand Down Expand Up @@ -4430,6 +4455,13 @@ void cudaD_apply_exp(dim3 Gr, dim3 Bl, double* mat, MatrixDim d) {
_apply_exp<<<Gr,Bl>>>(mat,d);
}

void cudaD_apply_exp_limited(dim3 Gr, dim3 Bl, double* mat, MatrixDim d,
double lower_limit, double upper_limit) {
_apply_exp_limited<<<Gr,Bl>>>(mat, d, lower_limit, upper_limit);
}



void cudaD_apply_pow(dim3 Gr, dim3 Bl, double* mat, double power, MatrixDim d) {
_apply_pow<<<Gr,Bl>>>(mat, power, d);
}
Expand Down
8 changes: 8 additions & 0 deletions src/cudamatrix/cu-kernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -345,6 +345,14 @@ inline void cuda_apply_exp(dim3 Gr, dim3 Bl, double* mat, MatrixDim d) {
inline void cuda_apply_exp(dim3 Gr, dim3 Bl, float* mat, MatrixDim d) {
cudaF_apply_exp(Gr, Bl, mat, d);
}
inline void cuda_apply_exp_limited(dim3 Gr, dim3 Bl, double* mat, MatrixDim d,
double lower_limit, double upper_limit) {
cudaD_apply_exp_limited(Gr, Bl, mat, d, lower_limit, upper_limit);
}
inline void cuda_apply_exp_limited(dim3 Gr, dim3 Bl, float* mat, MatrixDim d,
float lower_limit, float upper_limit) {
cudaF_apply_exp_limited(Gr, Bl, mat, d, lower_limit, upper_limit);
}
inline void cuda_apply_exp_special(dim3 Gr, dim3 Bl, double* out,
MatrixDim out_dim, const double* in,
int in_stride) {
Expand Down
25 changes: 25 additions & 0 deletions src/cudamatrix/cu-matrix-test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,30 @@ static void UnitTestCuMatrixApplyExp() {
}


template<typename Real>
static void UnitTestCuMatrixApplyExpLimited() {
int32 M = 10 + Rand() % 20, N = 10 + Rand() % 20;
Matrix<Real> H(M, N);
H.SetRandn();


BaseFloat lower_limit = -0.2, upper_limit = 0.2;

CuMatrix<Real> D(H);

D.ApplyExpLimited(lower_limit, upper_limit);


H.ApplyFloor(lower_limit);
H.ApplyCeiling(upper_limit);
H.ApplyExp();

Matrix<Real> H2(D);

AssertEqual(H,H2);
}



template<typename Real>
static void UnitTestCuMatrixSigmoid() {
Expand Down Expand Up @@ -2895,6 +2919,7 @@ static void UnitTestCuMatrixEqualElementMask() {

template<typename Real> void CudaMatrixUnitTest() {
UnitTestCuMatrixApplyExpSpecial<Real>();
UnitTestCuMatrixApplyExpLimited<Real>();
UnitTextCuMatrixAddSmatMat<Real>();
UnitTextCuMatrixAddMatSmat<Real>();
UnitTextCuMatrixAddSmat<Real>();
Expand Down
31 changes: 31 additions & 0 deletions src/cudamatrix/cu-matrix.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2498,6 +2498,37 @@ void CuMatrixBase<Real>::ApplyExp() {
}
}

template<typename Real>
void CuMatrixBase<Real>::ApplyExpLimited(Real lower_limit, Real upper_limit) {
KALDI_ASSERT(upper_limit > lower_limit);
#if HAVE_CUDA == 1
if (CuDevice::Instantiate().Enabled()) {
CuTimer tim;
dim3 dimGrid, dimBlock;
GetBlockSizesForSimpleMatrixOperation(NumRows(), NumCols(),
&dimGrid, &dimBlock);
cuda_apply_exp_limited(dimGrid, dimBlock, data_, Dim(), lower_limit, upper_limit);
CU_SAFE_CALL(cudaGetLastError());
CuDevice::Instantiate().AccuProfile(__func__, tim);
} else
#endif
{
int32 num_rows = num_rows_, num_cols = num_cols_;
for (int32 r = 0; r < num_rows; r++) {
Real *row_data = this->RowData(r);
for (int32 c = 0; c < num_cols; c++) {
Real x = row_data[c];
if (!(x >= lower_limit))
x = lower_limit;
if (x > upper_limit)
x = upper_limit;
row_data[c] = Exp(x);
}
}
}
}


template<typename Real>
void CuMatrixBase<Real>::ApplyExpSpecial() {
#if HAVE_CUDA == 1
Expand Down
7 changes: 7 additions & 0 deletions src/cudamatrix/cu-matrix.h
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,13 @@ class CuMatrixBase {
void ApplyCeiling(Real ceiling_val);
void ApplyExp();


/// This is equivalent to running:
/// ApplyFloor(lower_limit);
/// ApplyCeiling(upper_limit);
/// ApplyExp()
void ApplyExpLimited(Real lower_limit, Real upper_limit);

/// For each element x of the matrix, set it to
/// (x < 0 ? exp(x) : x + 1). This function is used
/// in our RNNLM training.
Expand Down
2 changes: 1 addition & 1 deletion src/nnet3/nnet-general-component.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1414,7 +1414,7 @@ void* DropoutMaskComponent::Propagate(
BaseFloat dropout_proportion = dropout_proportion_;
KALDI_ASSERT(dropout_proportion >= 0.0 && dropout_proportion <= 1.0);

if (dropout_proportion_ == 0) {
if (dropout_proportion == 0) {
out->Set(1.0);
return NULL;
}
Expand Down
6 changes: 1 addition & 5 deletions src/nnet3/nnet-simple-component.cc
Original file line number Diff line number Diff line change
Expand Up @@ -3730,15 +3730,11 @@ void NaturalGradientPerElementScaleComponent::InitFromConfig(ConfigLine *cfl) {
// for the preconditioner actually exceeds the memory for the
// parameters (by "rank").
update_period = 10;
BaseFloat num_samples_history = 2000.0, alpha = 4.0,
max_change_per_minibatch = 0.0;
BaseFloat num_samples_history = 2000.0, alpha = 4.0;
cfl->GetValue("rank", &rank);
cfl->GetValue("update-period", &update_period);
cfl->GetValue("num-samples-history", &num_samples_history);
cfl->GetValue("alpha", &alpha);
cfl->GetValue("max-change-per-minibatch", &max_change_per_minibatch);
if (max_change_per_minibatch != 0.0)
KALDI_WARN << "max-change-per-minibatch is now ignored, use 'max-change'";
InitLearningRatesFromConfig(cfl);
std::string filename;
// Accepts "scales" config (for filename) or "dim" -> random init, for testing.
Expand Down
38 changes: 36 additions & 2 deletions src/nnet3/nnet-simple-component.h
Original file line number Diff line number Diff line change
Expand Up @@ -1446,6 +1446,19 @@ class PermuteComponent: public Component {
trainable scale; it's like a linear component with a diagonal matrix. This
version (and its child class NaturalGradientPerElementScaleComponent)
requires the input for backprop. See also ScaleAndOffsetComponent.

Accepted values on its config line, with defaults if applicable:

vector If specified, the offsets will be read from this file ('vector'
is interpreted as an rxfilename).

dim The dimension that this component inputs and outputs.
Only required if 'vector' is not specified.

param-mean=1.0 Mean of randomly initialized offset parameters; should only
be supplied if 'vector' is not supplied.
param-stddev=0.0 Standard deviation of randomly initialized offset parameters;
should only be supplied if 'vector' is not supplied.
*/
class PerElementScaleComponent: public UpdatableComponent {
public:
Expand Down Expand Up @@ -1670,8 +1683,29 @@ class ConstantFunctionComponent: public UpdatableComponent {



// NaturalGradientPerElementScaleComponent is like PerElementScaleComponent but
// it uses a natural gradient update for the per-element scales.
/**
NaturalGradientPerElementScaleComponent is like PerElementScaleComponent but
it uses a natural gradient update for the per-element scales.

Accepted values on its config line, with defaults if applicable:

vector If specified, the offsets will be read from this file ('vector'
is interpreted as an rxfilename).

dim The dimension that this component inputs and outputs.
Only required if 'vector' is not specified.

param-mean=1.0 Mean of randomly initialized offset parameters; should only
be supplied if 'vector' is not supplied.
param-stddev=0.0 Standard deviation of randomly initialized offset parameters;
should only be supplied if 'vector' is not supplied.

And the natural-gradient-related configuration values:
rank=8
update-period=10
num-samples-history=2000.0
alpha=4.0
*/
class NaturalGradientPerElementScaleComponent: public PerElementScaleComponent {
public:

Expand Down