diff --git a/src/lat/sausages.cc b/src/lat/sausages.cc index 16a61b3f5eb..11737b7a299 100644 --- a/src/lat/sausages.cc +++ b/src/lat/sausages.cc @@ -2,6 +2,7 @@ // Copyright 2012 Johns Hopkins University (Author: Daniel Povey) // 2015 Guoguo Chen +// 2019 Dogan Can // See ../../COPYING for clarification regarding multiple authors // @@ -52,7 +53,28 @@ void MinimumBayesRisk::MbrDecode() { } // build the outputs (time, confidences), if (R_[q] != 0 || opts_.print_silence) { - one_best_times_.push_back(times_[q]); + one_best_times_.push_back(times_[q][0]); + size_t i = one_best_times_.size(); + if (i > 1 && one_best_times_[i-2].second > one_best_times_[i-1].first) { + // It's quite possible for this to happen, but it seems like it would + // have a bad effect on the downstream processing, so we fix it here. + // We resolve overlaps by redistributing the available time interval. + BaseFloat prev_right = i > 2 ? one_best_times_[i-3].second : 0.0; + BaseFloat left = std::max(prev_right, + std::min(one_best_times_[i-2].first, + one_best_times_[i-1].first)); + BaseFloat right = std::max(one_best_times_[i-2].second, + one_best_times_[i-1].second); + BaseFloat first_dur = + one_best_times_[i-2].second - one_best_times_[i-2].first; + BaseFloat second_dur = + one_best_times_[i-1].second - one_best_times_[i-1].first; + BaseFloat mid = left + (right - left) * first_dur / + (first_dur + second_dur); + one_best_times_[i-2].first = left; + one_best_times_[i-2].second = one_best_times_[i-1].first = mid; + one_best_times_[i-1].second = right; + } BaseFloat confidence = 0.0; for (int32 j = 0; j < gamma_[q].size(); j++) if (gamma_[q][j].first == R_[q]) confidence = gamma_[q][j].second; @@ -146,11 +168,11 @@ void MinimumBayesRisk::AccStats() { std::vector > gamma(Q+1); // temp. form of gamma. // index 1...Q [word] -> occ. - // The tau arrays below are the sums over words of the tau_b - // and tau_e timing quantities mentioned in Appendix C of - // the paper... we are using these to get averaged times for - // the sausage bins, not specifically for the 1-best output. - Vector tau_b(Q+1), tau_e(Q+1); + // The tau maps below are the sums over arcs with the same word label + // of the tau_b and tau_e timing quantities mentioned in Appendix C of + // the paper... we are using these to get averaged times for both the + // the sausage bins and the 1-best output. + std::vector > tau_b(Q+1), tau_e(Q+1); double Ltmp = EditDistance(N, Q, alpha, alpha_dash, alpha_dash_arc); if (L_ != 0 && Ltmp > L_) { // L_ != 0 is to rule out 1st iter. @@ -190,8 +212,8 @@ void MinimumBayesRisk::AccStats() { // next: gamma(q, w(a)) += beta_dash_arc(q) AddToMap(w_a, beta_dash_arc(q), &(gamma[q])); // next: accumulating times, see decl for tau_b,tau_e - tau_b(q) += state_times_[s_a] * beta_dash_arc(q); - tau_e(q) += state_times_[n] * beta_dash_arc(q); + AddToMap(w_a, state_times_[s_a] * beta_dash_arc(q), &(tau_b[q])); + AddToMap(w_a, state_times_[n] * beta_dash_arc(q), &(tau_e[q])); break; case 2: beta_dash(s_a, q) += beta_dash_arc(q); @@ -204,8 +226,8 @@ void MinimumBayesRisk::AccStats() { // WARNING: there was an error in Appendix C. If we followed // the instructions there the next line would say state_times_[sa], but // it would be wrong. I will try to publish an erratum. - tau_b(q) += state_times_[n] * beta_dash_arc(q); - tau_e(q) += state_times_[n] * beta_dash_arc(q); + AddToMap(0, state_times_[n] * beta_dash_arc(q), &(tau_b[q])); + AddToMap(0, state_times_[n] * beta_dash_arc(q), &(tau_e[q])); break; default: KALDI_ERR << "Invalid b_arc value"; // error in code. @@ -222,8 +244,8 @@ void MinimumBayesRisk::AccStats() { AddToMap(0, beta_dash_arc(q), &(gamma[q])); // the statements below are actually redundant because // state_times_[1] is zero. - tau_b(q) += state_times_[1] * beta_dash_arc(q); - tau_e(q) += state_times_[1] * beta_dash_arc(q); + AddToMap(0, state_times_[1] * beta_dash_arc(q), &(tau_b[q])); + AddToMap(0, state_times_[1] * beta_dash_arc(q), &(tau_e[q])); } for (int32 q = 1; q <= Q; q++) { // a check (line 35) double sum = 0.0; @@ -240,7 +262,8 @@ void MinimumBayesRisk::AccStats() { for (int32 q = 1; q <= Q; q++) { for (map::iterator iter = gamma[q].begin(); iter != gamma[q].end(); ++iter) - gamma_[q-1].push_back(std::make_pair(iter->first, static_cast(iter->second))); + gamma_[q-1].push_back( + std::make_pair(iter->first, static_cast(iter->second))); // sort gamma_[q-1] from largest to smallest posterior. GammaCompare comp; std::sort(gamma_[q-1].begin(), gamma_[q-1].end(), comp); @@ -250,18 +273,32 @@ void MinimumBayesRisk::AccStats() { // indexing. times_.clear(); times_.resize(Q); + sausage_times_.clear(); + sausage_times_.resize(Q); for (int32 q = 1; q <= Q; q++) { - times_[q-1].first = tau_b(q); - times_[q-1].second = tau_e(q); - if (times_[q-1].first > times_[q-1].second) // this is quite bad. - KALDI_WARN << "Times out of order"; - if (q > 1 && times_[q-2].second > times_[q-1].first) { + double t_b = 0.0, t_e = 0.0; + for (std::vector>::iterator iter = gamma_[q-1].begin(); + iter != gamma_[q-1].end(); ++iter) { + double w_b = tau_b[q][iter->first], w_e = tau_e[q][iter->first]; + if (w_b > w_e) + KALDI_WARN << "Times out of order"; // this is quite bad. + times_[q-1].push_back( + std::make_pair(static_cast(w_b / iter->second), + static_cast(w_e / iter->second))); + t_b += w_b; + t_e += w_e; + } + sausage_times_[q-1].first = t_b; + sausage_times_[q-1].second = t_e; + if (sausage_times_[q-1].first > sausage_times_[q-1].second) + KALDI_WARN << "Times out of order"; // this is quite bad. + if (q > 1 && sausage_times_[q-2].second > sausage_times_[q-1].first) { // We previously had a warning here, but now we'll just set both // those values to their average. It's quite possible for this // condition to happen, but it seems like it would have a bad effect // on the downstream processing, so we fix it. - double avg = 0.5 * (times_[q-2].second + times_[q-1].first); - times_[q-2].second = times_[q-1].first = avg; + sausage_times_[q-2].second = sausage_times_[q-1].first = + 0.5 * (sausage_times_[q-2].second + sausage_times_[q-1].first); } } } @@ -371,7 +408,7 @@ MinimumBayesRisk::MinimumBayesRisk(const CompactLattice &clat_in, PrepareLatticeAndInitStats(&clat); R_ = words; - times_ = times; + sausage_times_ = times; L_ = 0.0; MbrDecode(); diff --git a/src/lat/sausages.h b/src/lat/sausages.h index f613097b190..13f359c60d9 100644 --- a/src/lat/sausages.h +++ b/src/lat/sausages.h @@ -2,6 +2,7 @@ // Copyright 2012 Johns Hopkins University (Author: Daniel Povey) // 2015 Guoguo Chen +// 2019 Dogan Can // See ../../COPYING for clarification regarding multiple authors // @@ -104,17 +105,27 @@ class MinimumBayesRisk { return R_; } + const std::vector > > GetTimes() const { + return times_; // returns average (start,end) times for each word in each + // bin. These are raw averages without any processing, i.e. time intervals + // from different bins can overlap. + } + const std::vector > GetSausageTimes() const { - return times_; // returns average (start,end) times for each bin (each entry - // of GetSausageStats()). Note: if you want the times for the one best, - // you can work out the one best yourself from the sausage stats and get the times - // at the same time. + return sausage_times_; // returns average (start,end) times for each bin. + // This is typically the weighted average of the times in GetTimes() but can + // be slightly different if the times for the bins overlap, in which case + // the times returned by this method do not overlap unlike the times + // returned by GetTimes(). } const std::vector > &GetOneBestTimes() const { - return one_best_times_; // returns average (start,end) times for each bin corresponding - // to an entry in the one-best output. This is just the appropriate - // subsequence of the times in SausageTimes(). + return one_best_times_; // returns average (start,end) times for each word + // corresponding to an entry in the one-best output. This is typically the + // appropriate subset of the times in GetTimes() but can be slightly + // different if the times for the one-best words overlap, in which case + // the times returned by this method do not overlap unlike the times + // returned by GetTimes(). } /// Outputs the confidences for the one-best transcript. @@ -122,8 +133,7 @@ class MinimumBayesRisk { return one_best_confidences_; } - /// Returns the expected WER over this sentence (assuming - /// model correctness. + /// Returns the expected WER over this sentence (assuming model correctness). BaseFloat GetBayesRisk() const { return L_; } const std::vector > > &GetSausageStats() const { @@ -222,15 +232,20 @@ class MinimumBayesRisk { // paper. We sort in reverse order on the second member (posterior), so more // likely word is first. - std::vector > times_; + std::vector > > times_; + // The average start and end times for words in each confusion-network bin. + // This is like an average over arcs, of the tau_b and tau_e quantities in + // Appendix C of the paper. Indexed from zero, like gamma_ and R_. + + std::vector > sausage_times_; // The average start and end times for each confusion-network bin. This // is like an average over words, of the tau_b and tau_e quantities in // Appendix C of the paper. Indexed from zero, like gamma_ and R_. std::vector > one_best_times_; - // one_best_times_ is a subsequence of times_, corresponding to - // (start,end) times of words in the one best output. Actually these - // times are averages over the bin that each word came from. + // The average start and end times for words in the one best output. This + // is like an average over the arcs, of the tau_b and tau_e quantities in + // Appendix C of the paper. Indexed from zero, like gamma_ and R_. std::vector one_best_confidences_; // vector of confidences for the 1-best output (which could be