kaldi-asr · danpovey · Jul 18, 2019 · Jul 18, 2019
diff --git a/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/acoustic_model.py b/egs/wsj/s5/steps/libs/nnet3/train/frame_level_objf/acoustic_model.py
@@ -76,12 +76,10 @@ def prepare_initial_acoustic_model(dir, alidir, run_opts,
         common_train_lib.prepare_initial_network(dir, run_opts,
                                                  srand=srand)
 
-    # Convert to .mdl, train the transitions, set the priors.
+    # Convert to .mdl, set the priors.
     common_lib.execute_command(
         """{command} {dir}/log/init_mdl.log \
-                nnet3-am-init {alidir}/final.mdl {raw_mdl} - \| \
-                nnet3-am-train-transitions - \
-                "ark:gunzip -c {alidir}/ali.*.gz|" {dir}/0.mdl
+                nnet3-am-init {alidir}/final.mdl {raw_mdl} {dir}/0.mdl
         """.format(command=run_opts.command,
                    dir=dir, alidir=alidir,
                    raw_mdl=(input_model if input_model is not None

diff --git a/src/nnet3/Makefile b/src/nnet3/Makefile
@@ -31,7 +31,8 @@ OBJFILES = nnet-common.o nnet-compile.o nnet-component-itf.o \
   nnet-compile-looped.o decodable-simple-looped.o \
   decodable-online-looped.o convolution.o \
   nnet-convolutional-component.o attention.o \
-  nnet-attention-component.o nnet-tdnn-component.o nnet-batch-compute.o
+  nnet-attention-component.o nnet-tdnn-component.o nnet-batch-compute.o \
+  get-feature-transform.o
 
 
 LIBNAME = kaldi-nnet3
@@ -41,6 +42,6 @@ ADDLIBS = ../chain/kaldi-chain.a ../cudamatrix/kaldi-cudamatrix.a \
           ../fstext/kaldi-fstext.a ../hmm/kaldi-hmm.a \
           ../transform/kaldi-transform.a ../gmm/kaldi-gmm.a \
           ../tree/kaldi-tree.a ../util/kaldi-util.a ../matrix/kaldi-matrix.a ../cblasext/kaldi-cblasext.a \
-          ../base/kaldi-base.a 
+          ../base/kaldi-base.a
 
 include ../makefiles/default_rules.mk
diff --git a/src/nnet3/get-feature-transform.cc b/src/nnet3/get-feature-transform.cc
@@ -0,0 +1,203 @@
+// nnet3/get-feature-transform.cc
+
+// Copyright 2009-2011  Jan Silovsky
+//                2013  Johns Hopkins University (author: Daniel Povey)
+
+// See ../../COPYING for clarification regarding multiple authors
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//  http://www.apache.org/licenses/LICENSE-2.0
+//
+// THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED
+// WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE,
+// MERCHANTABLITY OR NON-INFRINGEMENT.
+// See the Apache 2 License for the specific language governing permissions and
+// limitations under the License.
+
+
+#include "nnet3/get-feature-transform.h"
+
+namespace kaldi {
+
+
+
+void FeatureTransformEstimate::Estimate(const FeatureTransformEstimateOptions &opts,
+                                        Matrix<BaseFloat> *M,
+                                        TpMatrix<BaseFloat> *C) const {
+  double count;
+  Vector<double> total_mean;
+  SpMatrix<double> total_covar, between_covar;
+  GetStats(&total_covar, &between_covar, &total_mean, &count);
+  KALDI_LOG << "Data count is " << count;
+  EstimateInternal(opts, total_covar, between_covar, total_mean, M, C);
+}
+
+// static
+void FeatureTransformEstimate::EstimateInternal(
+    const FeatureTransformEstimateOptions &opts,
+    const SpMatrix<double> &total_covar,
+    const SpMatrix<double> &between_covar,
+    const Vector<double> &total_mean,
+    Matrix<BaseFloat> *M,
+    TpMatrix<BaseFloat> *C) {
+
+  int32 target_dim = opts.dim, dim = total_covar.NumRows();
+  // Interpret zero or negative target_dim as the full dim
+  if (target_dim <= 0)
+    target_dim = dim;
+  // between-class covar is of most rank C-1
+  KALDI_ASSERT(target_dim <= dim);
+
+  // within-class covariance
+  SpMatrix<double> wc_covar(total_covar);
+  wc_covar.AddSp(-1.0, between_covar);
+  TpMatrix<double> wc_covar_sqrt(dim);
+  try {
+    wc_covar_sqrt.Cholesky(wc_covar);
+    if (C != NULL) {
+      C->Resize(dim);
+      C->CopyFromTp(wc_covar_sqrt);
+    }
+  } catch (...) {
+    BaseFloat smooth = 1.0e-03 * wc_covar.Trace() / wc_covar.NumRows();
+    KALDI_LOG << "Cholesky failed (possibly not +ve definite), so adding " << smooth
+              << " to diagonal and trying again.\n";
+    for (int32 i = 0; i < wc_covar.NumRows(); i++)
+      wc_covar(i, i) += smooth;
+    wc_covar_sqrt.Cholesky(wc_covar);
+  }
+  Matrix<double> wc_covar_sqrt_mat(wc_covar_sqrt);
+  wc_covar_sqrt_mat.Invert();
+
+  SpMatrix<double> tmp_sp(dim);
+  tmp_sp.AddMat2Sp(1.0, wc_covar_sqrt_mat, kNoTrans, between_covar, 0.0);
+  Matrix<double> tmp_mat(tmp_sp);
+  Matrix<double> svd_u(dim, dim), svd_vt(dim, dim);
+  Vector<double> svd_d(dim);
+  tmp_mat.Svd(&svd_d, &svd_u, &svd_vt);
+  SortSvd(&svd_d, &svd_u);
+
+  KALDI_LOG << "LDA singular values are " << svd_d;
+
+  KALDI_LOG << "Sum of all singular values is " << svd_d.Sum();
+  KALDI_LOG << "Sum of selected singular values is " <<
+      SubVector<double>(svd_d, 0, target_dim).Sum();
+
+  Matrix<double> lda_mat(dim, dim);
+  lda_mat.AddMatMat(1.0, svd_u, kTrans, wc_covar_sqrt_mat, kNoTrans, 0.0);
+
+  // finally, copy first target_dim rows to m
+  M->Resize(target_dim, dim);
+  M->CopyFromMat(lda_mat.Range(0, target_dim, 0, dim));
+
+  if (opts.within_class_factor != 1.0) {
+    for (int32 i = 0; i < svd_d.Dim(); i++) {
+      BaseFloat old_var = 1.0 + svd_d(i), // the total variance of that dim..
+          new_var = opts.within_class_factor + svd_d(i), // the variance we want..
+          scale = sqrt(new_var / old_var);
+      if (i < M->NumRows())
+        M->Row(i).Scale(scale);
+    }
+  }
+
+  if (opts.max_singular_value > 0.0) {
+    int32 rows = M->NumRows(), cols = M->NumCols(),
+        min_dim = std::min(rows, cols);
+    Matrix<BaseFloat> U(rows, min_dim), Vt(min_dim, cols);
+    Vector<BaseFloat> s(min_dim);
+    M->Svd(&s, &U, &Vt); // decompose m = U diag(s) Vt.
+    BaseFloat max_s = s.Max();
+    int32 n;
+    s.ApplyCeiling(opts.max_singular_value, &n);
+    if (n > 0) {
+      KALDI_LOG << "Applied ceiling to " << n << " out of " << s.Dim()
+                << " singular values of transform using ceiling "
+                << opts.max_singular_value << ", max is " << max_s;
+      Vt.MulRowsVec(s);
+      // reconstruct m with the modified singular values:
+      M->AddMatMat(1.0, U, kNoTrans, Vt, kNoTrans, 0.0);
+    }
+  }
+
+  if (opts.remove_offset)
+    AddMeanOffset(total_mean, M);
+}
+
+void FeatureTransformEstimateMulti::EstimateTransformPart(
+    const FeatureTransformEstimateOptions &opts,
+    const std::vector<int32> &indexes,
+    const SpMatrix<double> &total_covar,
+    const SpMatrix<double> &between_covar,
+    const Vector<double> &mean,
+    Matrix<BaseFloat> *M) const {
+
+  int32 full_dim = Dim(), proj_dim = indexes.size();
+  Matrix<double> transform(proj_dim, full_dim); // projects from full to projected dim.
+  for (int32 i = 0; i < proj_dim; i++)
+    transform(i, indexes[i]) = 1.0;
+
+  SpMatrix<double> total_covar_proj(proj_dim), between_covar_proj(proj_dim);
+  Vector<double> mean_proj(proj_dim);
+  total_covar_proj.AddMat2Sp(1.0, transform, kNoTrans, total_covar, 0.0);
+  between_covar_proj.AddMat2Sp(1.0, transform, kNoTrans, between_covar, 0.0);
+  mean_proj.AddMatVec(1.0, transform, kNoTrans, mean, 0.0);
+
+  Matrix<BaseFloat> M_proj;
+  FeatureTransformEstimateOptions opts_tmp(opts);
+  opts_tmp.dim = proj_dim;
+  EstimateInternal(opts_tmp, total_covar_proj, between_covar_proj, mean_proj,
+                   &M_proj, NULL);
+  if (M_proj.NumCols() == proj_dim + 1) { // Extend transform to add the extra "1" that we
+                                          // use to handle mean shifts..
+    transform.Resize(proj_dim + 1, full_dim + 1, kCopyData);
+    transform(proj_dim, full_dim) = 1.0;
+  }
+  M->Resize(proj_dim, transform.NumCols());
+  // Produce output..
+  M->AddMatMat(1.0, M_proj, kNoTrans, Matrix<BaseFloat>(transform),
+               kNoTrans, 0.0);
+}
+
+void FeatureTransformEstimateMulti::Estimate(
+    const FeatureTransformEstimateOptions &opts,
+    const std::vector<std::vector<int32> > &indexes,
+    Matrix<BaseFloat> *M) const {
+
+  int32 input_dim = Dim(), output_dim = 0, num_transforms = indexes.size();
+  for (int32 i = 0; i < num_transforms; i++) { // some input-checking.
+    KALDI_ASSERT(indexes[i].size() > 0);
+    std::vector<int32> this_indexes(indexes[i]);
+    std::sort(this_indexes.begin(), this_indexes.end());
+    KALDI_ASSERT(IsSortedAndUniq(this_indexes)); // check for duplicates.
+    KALDI_ASSERT(this_indexes.front() >= 0);
+    KALDI_ASSERT(this_indexes.back() < input_dim);
+    output_dim += this_indexes.size();
+  }
+
+  int32 input_dim_ext = (opts.remove_offset ? input_dim + 1 : input_dim);
+  M->Resize(output_dim, input_dim_ext);
+
+  double count;
+  Vector<double> total_mean;
+  SpMatrix<double> total_covar, between_covar;
+  GetStats(&total_covar, &between_covar, &total_mean, &count);
+
+  int32 cur_output_index = 0;
+  for (int32 i = 0; i < num_transforms; i++) {
+    Matrix<BaseFloat> M_tmp;
+    EstimateTransformPart(opts, indexes[i], total_covar, between_covar,
+                          total_mean, &M_tmp);
+    int32 this_output_dim = indexes[i].size();
+    M->Range(cur_output_index, this_output_dim, 0, M->NumCols()).
+        CopyFromMat(M_tmp);
+    cur_output_index += this_output_dim;
+  }
+
+}
+
+
+}  // End of namespace kaldi