kaldi-asr · danpovey · Jan 6, 2019 · Jan 6, 2019 · danpovey · Jan 6, 2019
diff --git a/src/lat/sausages.cc b/src/lat/sausages.cc
@@ -2,6 +2,7 @@
 
 // Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 //           2015  Guoguo Chen
+//           2019  Dogan Can
 
 // See ../../COPYING for clarification regarding multiple authors
 //
@@ -52,7 +53,28 @@ void MinimumBayesRisk::MbrDecode() {
       }
       // build the outputs (time, confidences),
       if (R_[q] != 0 || opts_.print_silence) {
-        one_best_times_.push_back(times_[q]);
+        one_best_times_.push_back(times_[q][0]);
+        size_t i = one_best_times_.size();
+        if (i > 1 && one_best_times_[i-2].second > one_best_times_[i-1].first) {
+          // It's quite possible for this to happen, but it seems like it would
+          // have a bad effect on the downstream processing, so we fix it here.
+          // We resolve overlaps by redistributing the available time interval.
+          BaseFloat prev_right = i > 2 ? one_best_times_[i-3].second : 0.0;
+          BaseFloat left = std::max(prev_right,
+                                    std::min(one_best_times_[i-2].first,
+                                             one_best_times_[i-1].first));
+          BaseFloat right = std::max(one_best_times_[i-2].second,
+                                     one_best_times_[i-1].second);
+          BaseFloat first_dur =
+              one_best_times_[i-2].second - one_best_times_[i-2].first;
+          BaseFloat second_dur =
+              one_best_times_[i-1].second - one_best_times_[i-1].first;
+          BaseFloat mid = left + (right - left) * first_dur /
+                                     (first_dur + second_dur);
+          one_best_times_[i-2].first = left;
+          one_best_times_[i-2].second = one_best_times_[i-1].first = mid;
+          one_best_times_[i-1].second = right;
+        }
         BaseFloat confidence = 0.0;
         for (int32 j = 0; j < gamma_[q].size(); j++)
           if (gamma_[q][j].first == R_[q]) confidence = gamma_[q][j].second;
@@ -146,11 +168,11 @@ void MinimumBayesRisk::AccStats() {
   std::vector<map<int32, double> > gamma(Q+1); // temp. form of gamma.
   // index 1...Q [word] -> occ.
 
-  // The tau arrays below are the sums over words of the tau_b
-  // and tau_e timing quantities mentioned in Appendix C of
-  // the paper... we are using these to get averaged times for
-  // the sausage bins, not specifically for the 1-best output.
-  Vector<double> tau_b(Q+1), tau_e(Q+1);
+  // The tau maps below are the sums over arcs with the same word label
+  // of the tau_b and tau_e timing quantities mentioned in Appendix C of
+  // the paper... we are using these to get averaged times for both the
+  // the sausage bins and the 1-best output.
+  std::vector<map<int32, double> > tau_b(Q+1), tau_e(Q+1);
 
   double Ltmp = EditDistance(N, Q, alpha, alpha_dash, alpha_dash_arc);
   if (L_ != 0 && Ltmp > L_) { // L_ != 0 is to rule out 1st iter.
@@ -190,8 +212,8 @@ void MinimumBayesRisk::AccStats() {
             // next: gamma(q, w(a)) += beta_dash_arc(q)
             AddToMap(w_a, beta_dash_arc(q), &(gamma[q]));
             // next: accumulating times, see decl for tau_b,tau_e
-            tau_b(q) += state_times_[s_a] * beta_dash_arc(q);
-            tau_e(q) += state_times_[n] * beta_dash_arc(q);
+            AddToMap(w_a, state_times_[s_a] * beta_dash_arc(q), &(tau_b[q]));
+            AddToMap(w_a, state_times_[n] * beta_dash_arc(q), &(tau_e[q]));
             break;
           case 2:
             beta_dash(s_a, q) += beta_dash_arc(q);
@@ -204,8 +226,8 @@ void MinimumBayesRisk::AccStats() {
             // WARNING: there was an error in Appendix C.  If we followed
             // the instructions there the next line would say state_times_[sa], but
             // it would be wrong.  I will try to publish an erratum.
-            tau_b(q) += state_times_[n] * beta_dash_arc(q);
-            tau_e(q) += state_times_[n] * beta_dash_arc(q);
+            AddToMap(0, state_times_[n] * beta_dash_arc(q), &(tau_b[q]));
+            AddToMap(0, state_times_[n] * beta_dash_arc(q), &(tau_e[q]));
             break;
           default:
             KALDI_ERR << "Invalid b_arc value"; // error in code.
@@ -222,8 +244,8 @@ void MinimumBayesRisk::AccStats() {
     AddToMap(0, beta_dash_arc(q), &(gamma[q]));
     // the statements below are actually redundant because
     // state_times_[1] is zero.
-    tau_b(q) += state_times_[1] * beta_dash_arc(q);
-    tau_e(q) += state_times_[1] * beta_dash_arc(q);
+    AddToMap(0, state_times_[1] * beta_dash_arc(q), &(tau_b[q]));
+    AddToMap(0, state_times_[1] * beta_dash_arc(q), &(tau_e[q]));
   }
   for (int32 q = 1; q <= Q; q++) { // a check (line 35)
     double sum = 0.0;
@@ -240,7 +262,8 @@ void MinimumBayesRisk::AccStats() {
   for (int32 q = 1; q <= Q; q++) {
     for (map<int32, double>::iterator iter = gamma[q].begin();
          iter != gamma[q].end(); ++iter)
-      gamma_[q-1].push_back(std::make_pair(iter->first, static_cast<BaseFloat>(iter->second)));
+      gamma_[q-1].push_back(
+          std::make_pair(iter->first, static_cast<BaseFloat>(iter->second)));
     // sort gamma_[q-1] from largest to smallest posterior.
     GammaCompare comp;
     std::sort(gamma_[q-1].begin(), gamma_[q-1].end(), comp);
@@ -250,18 +273,32 @@ void MinimumBayesRisk::AccStats() {
   // indexing.
   times_.clear();
   times_.resize(Q);
+  sausage_times_.clear();
+  sausage_times_.resize(Q);
   for (int32 q = 1; q <= Q; q++) {
-    times_[q-1].first = tau_b(q);
-    times_[q-1].second = tau_e(q);
-    if (times_[q-1].first > times_[q-1].second) // this is quite bad.
-      KALDI_WARN << "Times out of order";
-    if (q > 1 && times_[q-2].second > times_[q-1].first) {
+    double t_b = 0.0, t_e = 0.0;
+    for (std::vector<std::pair<int32, BaseFloat>>::iterator iter = gamma_[q-1].begin();
+         iter != gamma_[q-1].end(); ++iter) {
+      double w_b = tau_b[q][iter->first], w_e = tau_e[q][iter->first];
+      if (w_b > w_e)
+        KALDI_WARN << "Times out of order";  // this is quite bad.
+      times_[q-1].push_back(
+          std::make_pair(static_cast<BaseFloat>(w_b / iter->second),
+                         static_cast<BaseFloat>(w_e / iter->second)));
+      t_b += w_b;
+      t_e += w_e;
+    }
+    sausage_times_[q-1].first = t_b;
+    sausage_times_[q-1].second = t_e;
+    if (sausage_times_[q-1].first > sausage_times_[q-1].second)
+      KALDI_WARN << "Times out of order";  // this is quite bad.
+    if (q > 1 && sausage_times_[q-2].second > sausage_times_[q-1].first) {
       // We previously had a warning here, but now we'll just set both
       // those values to their average.  It's quite possible for this
       // condition to happen, but it seems like it would have a bad effect
       // on the downstream processing, so we fix it.
-      double avg = 0.5 * (times_[q-2].second + times_[q-1].first);
-      times_[q-2].second = times_[q-1].first = avg;
+      sausage_times_[q-2].second = sausage_times_[q-1].first =
+          0.5 * (sausage_times_[q-2].second + sausage_times_[q-1].first);
     }
   }
 }
@@ -371,7 +408,7 @@ MinimumBayesRisk::MinimumBayesRisk(const CompactLattice &clat_in,
   PrepareLatticeAndInitStats(&clat);
 
   R_ = words;
-  times_ = times;
+  sausage_times_ = times;
   L_ = 0.0;
 
   MbrDecode();

diff --git a/src/lat/sausages.h b/src/lat/sausages.h
@@ -2,6 +2,7 @@
 
 // Copyright 2012  Johns Hopkins University (Author: Daniel Povey)
 //           2015  Guoguo Chen
+//           2019  Dogan Can
 
 // See ../../COPYING for clarification regarding multiple authors
 //
@@ -104,26 +105,35 @@ class MinimumBayesRisk {
     return R_;
   }
 
+  const std::vector<std::vector<std::pair<BaseFloat, BaseFloat> > > GetTimes() const {
+    return times_; // returns average (start,end) times for each word in each
+    // bin. These are raw averages without any processing, i.e. time intervals
+    // from different bins can overlap.
+  }
+
   const std::vector<std::pair<BaseFloat, BaseFloat> > GetSausageTimes() const {
-    return times_; // returns average (start,end) times for each bin (each entry
-    // of GetSausageStats()).  Note: if you want the times for the one best,
-    // you can work out the one best yourself from the sausage stats and get the times
-    // at the same time.
+    return sausage_times_; // returns average (start,end) times for each bin.
+    // This is typically the weighted average of the times in GetTimes() but can
+    // be slightly different if the times for the bins overlap, in which case
+    // the times returned by this method do not overlap unlike the times
+    // returned by GetTimes().
   }
 
   const std::vector<std::pair<BaseFloat, BaseFloat> > &GetOneBestTimes() const {
-    return one_best_times_; // returns average (start,end) times for each bin corresponding
-    // to an entry in the one-best output.  This is just the appropriate
-    // subsequence of the times in SausageTimes().
+    return one_best_times_; // returns average (start,end) times for each word
+    // corresponding to an entry in the one-best output.  This is typically the
+    // appropriate subset of the times in GetTimes() but can be slightly
+    // different if the times for the one-best words overlap, in which case
+    // the times returned by this method do not overlap unlike the times
+    // returned by GetTimes().
   }
 
   /// Outputs the confidences for the one-best transcript.
   const std::vector<BaseFloat> &GetOneBestConfidences() const {
     return one_best_confidences_;
   }
 
-  /// Returns the expected WER over this sentence (assuming
-  /// model correctness.
+  /// Returns the expected WER over this sentence (assuming model correctness).
   BaseFloat GetBayesRisk() const { return L_; }
 
   const std::vector<std::vector<std::pair<int32, BaseFloat> > > &GetSausageStats() const {
@@ -222,15 +232,20 @@ class MinimumBayesRisk {
   // paper.  We sort in reverse order on the second member (posterior), so more
   // likely word is first.
 
-  std::vector<std::pair<BaseFloat, BaseFloat> > times_;
+  std::vector<std::vector<std::pair<BaseFloat, BaseFloat> > > times_;
+  // The average start and end times for words in each confusion-network bin.
+  // This is like an average over arcs, of the tau_b and tau_e quantities in
+  // Appendix C of the paper.  Indexed from zero, like gamma_ and R_.
+
+  std::vector<std::pair<BaseFloat, BaseFloat> > sausage_times_;
   // The average start and end times for each confusion-network bin.  This
   // is like an average over words, of the tau_b and tau_e quantities in
   // Appendix C of the paper.  Indexed from zero, like gamma_ and R_.
 
   std::vector<std::pair<BaseFloat, BaseFloat> > one_best_times_;
-  // one_best_times_ is a subsequence of times_, corresponding to
-  // (start,end) times of words in the one best output.  Actually these
-  // times are averages over the bin that each word came from.
+  // The average start and end times for words in the one best output.  This
+  // is like an average over the arcs, of the tau_b and tau_e quantities in
+  // Appendix C of the paper. Indexed from zero, like gamma_ and R_.
 
   std::vector<BaseFloat> one_best_confidences_;
   // vector of confidences for the 1-best output (which could be