Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion src/feat/online-feature.cc
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,13 @@ OnlineGenericBaseFeature<C>::OnlineGenericBaseFeature(
const typename C::Options &opts):
computer_(opts), window_function_(computer_.GetFrameOptions()),
features_(opts.frame_opts.max_feature_vectors),
input_finished_(false), waveform_offset_(0) { }
input_finished_(false), waveform_offset_(0) {
// RE the following assert: search for ONLINE_IVECTOR_LIMIT in
// online-ivector-feature.cc.
// Casting to uint32, an unsigned type, means that -1 would be treated
// as `very large`.
KALDI_ASSERT(static_cast<uint32>(opts.frame_opts.max_feature_vectors) > 200);
}


template <class C>
Expand Down
2 changes: 1 addition & 1 deletion src/ivector/ivector-extractor.h
Original file line number Diff line number Diff line change
Expand Up @@ -468,7 +468,7 @@ struct IvectorExtractorEstimationOptions {
"update any associated parameters.");
opts->Register("diagonalize", &diagonalize,
"If true, diagonalize the quadratic term in the "
"objective function. This reorders the ivector dimensions"
"objective function. This reorders the ivector dimensions "
"from most to least important.");
}
};
Expand Down
67 changes: 11 additions & 56 deletions src/online2/online-ivector-feature.cc
Original file line number Diff line number Diff line change
Expand Up @@ -527,58 +527,6 @@ template
void OnlineSilenceWeighting::ComputeCurrentTraceback<fst::GrammarFst>(
const LatticeFasterOnlineDecoderTpl<fst::GrammarFst> &decoder);

int32 OnlineSilenceWeighting::GetBeginFrame() {
int32 max_duration = config_.max_state_duration;
if (max_duration <= 0 || num_frames_output_and_correct_ == 0)
return num_frames_output_and_correct_;

// t_last_untouched is the index of the last frame that is not newly touched
// by ComputeCurrentTraceback. We are interested in whether it is part of a
// run of length greater than max_duration, since this would force it
// to be treated as silence (note: typically a non-silence phone that's very
// long is really silence, for example this can happen with the word "mm").

int32 t_last_untouched = num_frames_output_and_correct_ - 1,
t_end = frame_info_.size();
int32 transition_id = frame_info_[t_last_untouched].transition_id;
// no point searching longer than max_duration; when the length of the run is
// at least that much, a longer length makes no difference.
int32 lower_search_bound = std::max(0, t_last_untouched - max_duration),
upper_search_bound = std::min(t_last_untouched + max_duration, t_end - 1),
t_lower, t_upper;

// t_lower will be the first index in the run of equal transition-ids.
for (t_lower = t_last_untouched;
t_lower > lower_search_bound &&
frame_info_[t_lower - 1].transition_id == transition_id; t_lower--);

// t_lower will be the last index in the run of equal transition-ids.
for (t_upper = t_last_untouched;
t_upper < upper_search_bound &&
frame_info_[t_upper + 1].transition_id == transition_id; t_upper++);

int32 run_length = t_upper - t_lower + 1;
if (run_length <= max_duration) {
// we wouldn't treat this run as being silence, as it's within
// the duration limit. So we return the default value
// num_frames_output_and_correct_ as our lower bound for processing.
return num_frames_output_and_correct_;
}
int32 old_run_length = t_last_untouched - t_lower + 1;
if (old_run_length > max_duration) {
// The run-length before we got this new data was already longer than the
// max-duration, so would already have been treated as silence. therefore
// we don't have to encompass it all- we just include a long enough length
// in the region we are going to process, that the run-length in that region
// is longer than max_duration.
int32 ans = t_upper - max_duration;
KALDI_ASSERT(ans >= t_lower);
return ans;
} else {
return t_lower;
}
}

void OnlineSilenceWeighting::GetDeltaWeights(
int32 num_frames_ready, int32 first_decoder_frame,
std::vector<std::pair<int32, BaseFloat> > *delta_weights) {
Expand All @@ -594,13 +542,20 @@ void OnlineSilenceWeighting::GetDeltaWeights(

delta_weights->clear();

int32 prev_num_frames_processed = frame_info_.size();
if (frame_info_.size() < static_cast<size_t>(num_decoder_frames_ready))
frame_info_.resize(num_decoder_frames_ready);

// we may have to make begin_frame earlier than num_frames_output_and_correct_
// so that max_state_duration is properly enforced. GetBeginFrame() handles
// this logic.
int32 begin_frame = GetBeginFrame(),
// Don't go further backward into the past then 100 frames before the most
// recent frame previously than 100 frames when modifying the traceback.
// C.f. the value 200 in template
// OnlineGenericBaseFeature<C>::OnlineGenericBaseFeature in online-feature.cc,
// which needs to be more than this value of 100 plus the amount of context
// that LDA might use plus the chunk size we're likely to decode in one time.
// The user can always increase the value of --max-feature-vectors in case one
// of these conditions is broken. Search for ONLINE_IVECTOR_LIMIT in
// online-feature.cc
int32 begin_frame = std::max<int32>(0, prev_num_frames_processed - 100),
frames_out = static_cast<int32>(frame_info_.size()) - begin_frame;
// frames_out is the number of frames we will output.
KALDI_ASSERT(frames_out >= 0);
Expand Down
10 changes: 2 additions & 8 deletions src/online2/online-ivector-feature.h
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ struct OnlineIvectorExtractionConfig {
"add online-cmvn to feature pipeline of ivector extractor, "
"use the cmvn setup from the UBM. Note: the default of "
"false is what we historically used; we'd use true if "
"we were using CMVN'ed features for the neural net.")
"we were using CMVN'ed features for the neural net.");
opts->Register("splice-config", &splice_config_rxfilename, "Configuration file "
"for frame splicing (--left-context and --right-context "
"options); used for iVector extraction.");
Expand Down Expand Up @@ -470,7 +470,7 @@ class OnlineSilenceWeighting {

OnlineSilenceWeighting(const TransitionModel &trans_model,
const OnlineSilenceWeightingConfig &config,
int32 frame_subsampling_factor = 1);
int32 frame_subsampling_factor = 1);

bool Active() const { return config_.Active(); }

Expand Down Expand Up @@ -530,12 +530,6 @@ class OnlineSilenceWeighting {
FrameInfo(): token(NULL), transition_id(-1), current_weight(0.0) {}
};

// gets the frame at which we need to begin our processing in
// GetDeltaWeights... normally this is equal to
// num_frames_output_and_correct_, but it may be earlier in case
// max_state_duration is relevant.
int32 GetBeginFrame();

// This contains information about any previously computed traceback;
// when the traceback changes we use this variable to compare it with the
// previous traceback.
Expand Down