kaldi-asr · danpovey · Aug 29, 2019 · Jun 20, 2019 · Jun 21, 2019 · Aug 28, 2019
diff --git a/src/feat/online-feature.cc b/src/feat/online-feature.cc
@@ -71,7 +71,13 @@ OnlineGenericBaseFeature<C>::OnlineGenericBaseFeature(
     const typename C::Options &opts):
     computer_(opts), window_function_(computer_.GetFrameOptions()),
     features_(opts.frame_opts.max_feature_vectors),
-    input_finished_(false), waveform_offset_(0) { }
+    input_finished_(false), waveform_offset_(0) {
+  // RE the following assert: search for ONLINE_IVECTOR_LIMIT in
+  // online-ivector-feature.cc.
+  // Casting to uint32, an unsigned type, means that -1 would be treated
+  // as `very large`.
+  KALDI_ASSERT(static_cast<uint32>(opts.frame_opts.max_feature_vectors) > 200);
+}
 
 
 template <class C>

diff --git a/src/ivector/ivector-extractor.h b/src/ivector/ivector-extractor.h
@@ -468,7 +468,7 @@ struct IvectorExtractorEstimationOptions {
                    "update any associated parameters.");
     opts->Register("diagonalize", &diagonalize,
                    "If true, diagonalize the quadratic term in the "
-                   "objective function. This reorders the ivector dimensions"
+                   "objective function. This reorders the ivector dimensions "
                    "from most to least important.");
   }
 };

diff --git a/src/online2/online-ivector-feature.cc b/src/online2/online-ivector-feature.cc
@@ -527,58 +527,6 @@ template
 void OnlineSilenceWeighting::ComputeCurrentTraceback<fst::GrammarFst>(
     const LatticeFasterOnlineDecoderTpl<fst::GrammarFst> &decoder);
 
-int32 OnlineSilenceWeighting::GetBeginFrame() {
-  int32 max_duration = config_.max_state_duration;
-  if (max_duration <= 0 || num_frames_output_and_correct_ == 0)
-    return num_frames_output_and_correct_;
-
-  // t_last_untouched is the index of the last frame that is not newly touched
-  // by ComputeCurrentTraceback.  We are interested in whether it is part of a
-  // run of length greater than max_duration, since this would force it
-  // to be treated as silence (note: typically a non-silence phone that's very
-  // long is really silence, for example this can happen with the word "mm").
-
-  int32 t_last_untouched = num_frames_output_and_correct_ - 1,
-      t_end = frame_info_.size();
-  int32 transition_id = frame_info_[t_last_untouched].transition_id;
-  // no point searching longer than max_duration; when the length of the run is
-  // at least that much, a longer length makes no difference.
-  int32 lower_search_bound = std::max(0, t_last_untouched - max_duration),
-      upper_search_bound = std::min(t_last_untouched + max_duration, t_end - 1),
-      t_lower, t_upper;
-
-  // t_lower will be the first index in the run of equal transition-ids.
-  for (t_lower = t_last_untouched;
-       t_lower > lower_search_bound &&
-           frame_info_[t_lower - 1].transition_id == transition_id; t_lower--);
-
-  // t_lower will be the last index in the run of equal transition-ids.
-  for (t_upper = t_last_untouched;
-       t_upper < upper_search_bound &&
-           frame_info_[t_upper + 1].transition_id == transition_id; t_upper++);
-
-  int32 run_length = t_upper - t_lower + 1;
-  if (run_length <= max_duration) {
-    // we wouldn't treat this run as being silence, as it's within
-    // the duration limit.  So we return the default value
-    // num_frames_output_and_correct_ as our lower bound for processing.
-    return num_frames_output_and_correct_;
-  }
-  int32 old_run_length = t_last_untouched - t_lower + 1;
-  if (old_run_length > max_duration) {
-    // The run-length before we got this new data was already longer than the
-    // max-duration, so would already have been treated as silence.  therefore
-    // we don't have to encompass it all- we just include a long enough length
-    // in the region we are going to process, that the run-length in that region
-    // is longer than max_duration.
-    int32 ans = t_upper - max_duration;
-    KALDI_ASSERT(ans >= t_lower);
-    return ans;
-  } else {
-    return t_lower;
-  }
-}
-
 void OnlineSilenceWeighting::GetDeltaWeights(
     int32 num_frames_ready, int32 first_decoder_frame,
     std::vector<std::pair<int32, BaseFloat> > *delta_weights) {
@@ -594,13 +542,20 @@ void OnlineSilenceWeighting::GetDeltaWeights(
 
   delta_weights->clear();
 
+  int32 prev_num_frames_processed = frame_info_.size();
   if (frame_info_.size() < static_cast<size_t>(num_decoder_frames_ready))
     frame_info_.resize(num_decoder_frames_ready);
 
-  // we may have to make begin_frame earlier than num_frames_output_and_correct_
-  // so that max_state_duration is properly enforced.   GetBeginFrame() handles
-  // this logic.
-  int32 begin_frame = GetBeginFrame(),
+  // Don't go further backward into the past then 100 frames before the most
+  // recent frame previously than 100 frames when modifying the traceback.
+  // C.f. the value 200 in template
+  // OnlineGenericBaseFeature<C>::OnlineGenericBaseFeature in online-feature.cc,
+  // which needs to be more than this value of 100 plus the amount of context
+  // that LDA might use plus the chunk size we're likely to decode in one time.
+  // The user can always increase the value of --max-feature-vectors in case one
+  // of these conditions is broken.  Search for ONLINE_IVECTOR_LIMIT in
+  // online-feature.cc
+  int32 begin_frame = std::max<int32>(0, prev_num_frames_processed - 100),
       frames_out = static_cast<int32>(frame_info_.size()) - begin_frame;
   // frames_out is the number of frames we will output.
   KALDI_ASSERT(frames_out >= 0);

diff --git a/src/online2/online-ivector-feature.h b/src/online2/online-ivector-feature.h
@@ -125,7 +125,7 @@ struct OnlineIvectorExtractionConfig {
                    "add online-cmvn to feature pipeline of ivector extractor, "
                    "use the cmvn setup from the UBM.  Note: the default of "
                    "false is what we historically used; we'd use true if "
-                   "we were using CMVN'ed features for the neural net.")
+                   "we were using CMVN'ed features for the neural net.");
     opts->Register("splice-config", &splice_config_rxfilename, "Configuration file "
                    "for frame splicing (--left-context and --right-context "
                    "options); used for iVector extraction.");
@@ -470,7 +470,7 @@ class OnlineSilenceWeighting {
 
   OnlineSilenceWeighting(const TransitionModel &trans_model,
                          const OnlineSilenceWeightingConfig &config,
-			 int32 frame_subsampling_factor = 1);
+                         int32 frame_subsampling_factor = 1);
 
   bool Active() const { return config_.Active(); }
 
@@ -530,12 +530,6 @@ class OnlineSilenceWeighting {
     FrameInfo(): token(NULL), transition_id(-1), current_weight(0.0) {}
   };
 
-  // gets the frame at which we need to begin our processing in
-  // GetDeltaWeights...  normally this is equal to
-  // num_frames_output_and_correct_, but it may be earlier in case
-  // max_state_duration is relevant.
-  int32 GetBeginFrame();
-
   // This contains information about any previously computed traceback;
   // when the traceback changes we use this variable to compare it with the
   // previous traceback.