diff --git a/src/feat/online-feature.cc b/src/feat/online-feature.cc index b7f5b3ebc60..6f5ce6ee95b 100644 --- a/src/feat/online-feature.cc +++ b/src/feat/online-feature.cc @@ -71,7 +71,13 @@ OnlineGenericBaseFeature::OnlineGenericBaseFeature( const typename C::Options &opts): computer_(opts), window_function_(computer_.GetFrameOptions()), features_(opts.frame_opts.max_feature_vectors), - input_finished_(false), waveform_offset_(0) { } + input_finished_(false), waveform_offset_(0) { + // RE the following assert: search for ONLINE_IVECTOR_LIMIT in + // online-ivector-feature.cc. + // Casting to uint32, an unsigned type, means that -1 would be treated + // as `very large`. + KALDI_ASSERT(static_cast(opts.frame_opts.max_feature_vectors) > 200); +} template diff --git a/src/ivector/ivector-extractor.h b/src/ivector/ivector-extractor.h index 3b9b6f3eb5c..938034859e2 100644 --- a/src/ivector/ivector-extractor.h +++ b/src/ivector/ivector-extractor.h @@ -468,7 +468,7 @@ struct IvectorExtractorEstimationOptions { "update any associated parameters."); opts->Register("diagonalize", &diagonalize, "If true, diagonalize the quadratic term in the " - "objective function. This reorders the ivector dimensions" + "objective function. This reorders the ivector dimensions " "from most to least important."); } }; diff --git a/src/online2/online-ivector-feature.cc b/src/online2/online-ivector-feature.cc index e78e989c396..21663d70a02 100644 --- a/src/online2/online-ivector-feature.cc +++ b/src/online2/online-ivector-feature.cc @@ -527,58 +527,6 @@ template void OnlineSilenceWeighting::ComputeCurrentTraceback( const LatticeFasterOnlineDecoderTpl &decoder); -int32 OnlineSilenceWeighting::GetBeginFrame() { - int32 max_duration = config_.max_state_duration; - if (max_duration <= 0 || num_frames_output_and_correct_ == 0) - return num_frames_output_and_correct_; - - // t_last_untouched is the index of the last frame that is not newly touched - // by ComputeCurrentTraceback. We are interested in whether it is part of a - // run of length greater than max_duration, since this would force it - // to be treated as silence (note: typically a non-silence phone that's very - // long is really silence, for example this can happen with the word "mm"). - - int32 t_last_untouched = num_frames_output_and_correct_ - 1, - t_end = frame_info_.size(); - int32 transition_id = frame_info_[t_last_untouched].transition_id; - // no point searching longer than max_duration; when the length of the run is - // at least that much, a longer length makes no difference. - int32 lower_search_bound = std::max(0, t_last_untouched - max_duration), - upper_search_bound = std::min(t_last_untouched + max_duration, t_end - 1), - t_lower, t_upper; - - // t_lower will be the first index in the run of equal transition-ids. - for (t_lower = t_last_untouched; - t_lower > lower_search_bound && - frame_info_[t_lower - 1].transition_id == transition_id; t_lower--); - - // t_lower will be the last index in the run of equal transition-ids. - for (t_upper = t_last_untouched; - t_upper < upper_search_bound && - frame_info_[t_upper + 1].transition_id == transition_id; t_upper++); - - int32 run_length = t_upper - t_lower + 1; - if (run_length <= max_duration) { - // we wouldn't treat this run as being silence, as it's within - // the duration limit. So we return the default value - // num_frames_output_and_correct_ as our lower bound for processing. - return num_frames_output_and_correct_; - } - int32 old_run_length = t_last_untouched - t_lower + 1; - if (old_run_length > max_duration) { - // The run-length before we got this new data was already longer than the - // max-duration, so would already have been treated as silence. therefore - // we don't have to encompass it all- we just include a long enough length - // in the region we are going to process, that the run-length in that region - // is longer than max_duration. - int32 ans = t_upper - max_duration; - KALDI_ASSERT(ans >= t_lower); - return ans; - } else { - return t_lower; - } -} - void OnlineSilenceWeighting::GetDeltaWeights( int32 num_frames_ready, int32 first_decoder_frame, std::vector > *delta_weights) { @@ -594,13 +542,20 @@ void OnlineSilenceWeighting::GetDeltaWeights( delta_weights->clear(); + int32 prev_num_frames_processed = frame_info_.size(); if (frame_info_.size() < static_cast(num_decoder_frames_ready)) frame_info_.resize(num_decoder_frames_ready); - // we may have to make begin_frame earlier than num_frames_output_and_correct_ - // so that max_state_duration is properly enforced. GetBeginFrame() handles - // this logic. - int32 begin_frame = GetBeginFrame(), + // Don't go further backward into the past then 100 frames before the most + // recent frame previously than 100 frames when modifying the traceback. + // C.f. the value 200 in template + // OnlineGenericBaseFeature::OnlineGenericBaseFeature in online-feature.cc, + // which needs to be more than this value of 100 plus the amount of context + // that LDA might use plus the chunk size we're likely to decode in one time. + // The user can always increase the value of --max-feature-vectors in case one + // of these conditions is broken. Search for ONLINE_IVECTOR_LIMIT in + // online-feature.cc + int32 begin_frame = std::max(0, prev_num_frames_processed - 100), frames_out = static_cast(frame_info_.size()) - begin_frame; // frames_out is the number of frames we will output. KALDI_ASSERT(frames_out >= 0); diff --git a/src/online2/online-ivector-feature.h b/src/online2/online-ivector-feature.h index 1efe907e8cc..511d17a4b52 100644 --- a/src/online2/online-ivector-feature.h +++ b/src/online2/online-ivector-feature.h @@ -125,7 +125,7 @@ struct OnlineIvectorExtractionConfig { "add online-cmvn to feature pipeline of ivector extractor, " "use the cmvn setup from the UBM. Note: the default of " "false is what we historically used; we'd use true if " - "we were using CMVN'ed features for the neural net.") + "we were using CMVN'ed features for the neural net."); opts->Register("splice-config", &splice_config_rxfilename, "Configuration file " "for frame splicing (--left-context and --right-context " "options); used for iVector extraction."); @@ -470,7 +470,7 @@ class OnlineSilenceWeighting { OnlineSilenceWeighting(const TransitionModel &trans_model, const OnlineSilenceWeightingConfig &config, - int32 frame_subsampling_factor = 1); + int32 frame_subsampling_factor = 1); bool Active() const { return config_.Active(); } @@ -530,12 +530,6 @@ class OnlineSilenceWeighting { FrameInfo(): token(NULL), transition_id(-1), current_weight(0.0) {} }; - // gets the frame at which we need to begin our processing in - // GetDeltaWeights... normally this is equal to - // num_frames_output_and_correct_, but it may be earlier in case - // max_state_duration is relevant. - int32 GetBeginFrame(); - // This contains information about any previously computed traceback; // when the traceback changes we use this variable to compare it with the // previous traceback.