Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
79 changes: 58 additions & 21 deletions src/lat/sausages.cc
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

// Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
// 2015 Guoguo Chen
// 2019 Dogan Can

// See ../../COPYING for clarification regarding multiple authors
//
Expand Down Expand Up @@ -52,7 +53,28 @@ void MinimumBayesRisk::MbrDecode() {
}
// build the outputs (time, confidences),
if (R_[q] != 0 || opts_.print_silence) {
one_best_times_.push_back(times_[q]);
one_best_times_.push_back(times_[q][0]);
size_t i = one_best_times_.size();
if (i > 1 && one_best_times_[i-2].second > one_best_times_[i-1].first) {
// It's quite possible for this to happen, but it seems like it would
// have a bad effect on the downstream processing, so we fix it here.
// We resolve overlaps by redistributing the available time interval.
BaseFloat prev_right = i > 2 ? one_best_times_[i-3].second : 0.0;
BaseFloat left = std::max(prev_right,
std::min(one_best_times_[i-2].first,
one_best_times_[i-1].first));
BaseFloat right = std::max(one_best_times_[i-2].second,
one_best_times_[i-1].second);
BaseFloat first_dur =
one_best_times_[i-2].second - one_best_times_[i-2].first;
BaseFloat second_dur =
one_best_times_[i-1].second - one_best_times_[i-1].first;
BaseFloat mid = left + (right - left) * first_dur /
(first_dur + second_dur);
one_best_times_[i-2].first = left;
one_best_times_[i-2].second = one_best_times_[i-1].first = mid;
one_best_times_[i-1].second = right;
}
BaseFloat confidence = 0.0;
for (int32 j = 0; j < gamma_[q].size(); j++)
if (gamma_[q][j].first == R_[q]) confidence = gamma_[q][j].second;
Expand Down Expand Up @@ -146,11 +168,11 @@ void MinimumBayesRisk::AccStats() {
std::vector<map<int32, double> > gamma(Q+1); // temp. form of gamma.
// index 1...Q [word] -> occ.

// The tau arrays below are the sums over words of the tau_b
// and tau_e timing quantities mentioned in Appendix C of
// the paper... we are using these to get averaged times for
// the sausage bins, not specifically for the 1-best output.
Vector<double> tau_b(Q+1), tau_e(Q+1);
// The tau maps below are the sums over arcs with the same word label
// of the tau_b and tau_e timing quantities mentioned in Appendix C of
// the paper... we are using these to get averaged times for both the
// the sausage bins and the 1-best output.
std::vector<map<int32, double> > tau_b(Q+1), tau_e(Q+1);

double Ltmp = EditDistance(N, Q, alpha, alpha_dash, alpha_dash_arc);
if (L_ != 0 && Ltmp > L_) { // L_ != 0 is to rule out 1st iter.
Expand Down Expand Up @@ -190,8 +212,8 @@ void MinimumBayesRisk::AccStats() {
// next: gamma(q, w(a)) += beta_dash_arc(q)
AddToMap(w_a, beta_dash_arc(q), &(gamma[q]));
// next: accumulating times, see decl for tau_b,tau_e
tau_b(q) += state_times_[s_a] * beta_dash_arc(q);
tau_e(q) += state_times_[n] * beta_dash_arc(q);
AddToMap(w_a, state_times_[s_a] * beta_dash_arc(q), &(tau_b[q]));
AddToMap(w_a, state_times_[n] * beta_dash_arc(q), &(tau_e[q]));
break;
case 2:
beta_dash(s_a, q) += beta_dash_arc(q);
Expand All @@ -204,8 +226,8 @@ void MinimumBayesRisk::AccStats() {
// WARNING: there was an error in Appendix C. If we followed
// the instructions there the next line would say state_times_[sa], but
// it would be wrong. I will try to publish an erratum.
tau_b(q) += state_times_[n] * beta_dash_arc(q);
tau_e(q) += state_times_[n] * beta_dash_arc(q);
AddToMap(0, state_times_[n] * beta_dash_arc(q), &(tau_b[q]));
AddToMap(0, state_times_[n] * beta_dash_arc(q), &(tau_e[q]));
break;
default:
KALDI_ERR << "Invalid b_arc value"; // error in code.
Expand All @@ -222,8 +244,8 @@ void MinimumBayesRisk::AccStats() {
AddToMap(0, beta_dash_arc(q), &(gamma[q]));
// the statements below are actually redundant because
// state_times_[1] is zero.
tau_b(q) += state_times_[1] * beta_dash_arc(q);
tau_e(q) += state_times_[1] * beta_dash_arc(q);
AddToMap(0, state_times_[1] * beta_dash_arc(q), &(tau_b[q]));
AddToMap(0, state_times_[1] * beta_dash_arc(q), &(tau_e[q]));
}
for (int32 q = 1; q <= Q; q++) { // a check (line 35)
double sum = 0.0;
Expand All @@ -240,7 +262,8 @@ void MinimumBayesRisk::AccStats() {
for (int32 q = 1; q <= Q; q++) {
for (map<int32, double>::iterator iter = gamma[q].begin();
iter != gamma[q].end(); ++iter)
gamma_[q-1].push_back(std::make_pair(iter->first, static_cast<BaseFloat>(iter->second)));
gamma_[q-1].push_back(
std::make_pair(iter->first, static_cast<BaseFloat>(iter->second)));
// sort gamma_[q-1] from largest to smallest posterior.
GammaCompare comp;
std::sort(gamma_[q-1].begin(), gamma_[q-1].end(), comp);
Expand All @@ -250,18 +273,32 @@ void MinimumBayesRisk::AccStats() {
// indexing.
times_.clear();
times_.resize(Q);
sausage_times_.clear();
sausage_times_.resize(Q);
for (int32 q = 1; q <= Q; q++) {
times_[q-1].first = tau_b(q);
times_[q-1].second = tau_e(q);
if (times_[q-1].first > times_[q-1].second) // this is quite bad.
KALDI_WARN << "Times out of order";
if (q > 1 && times_[q-2].second > times_[q-1].first) {
double t_b = 0.0, t_e = 0.0;
for (std::vector<std::pair<int32, BaseFloat>>::iterator iter = gamma_[q-1].begin();
iter != gamma_[q-1].end(); ++iter) {
double w_b = tau_b[q][iter->first], w_e = tau_e[q][iter->first];
if (w_b > w_e)
KALDI_WARN << "Times out of order"; // this is quite bad.
times_[q-1].push_back(
std::make_pair(static_cast<BaseFloat>(w_b / iter->second),
static_cast<BaseFloat>(w_e / iter->second)));
t_b += w_b;
t_e += w_e;
}
sausage_times_[q-1].first = t_b;
sausage_times_[q-1].second = t_e;
if (sausage_times_[q-1].first > sausage_times_[q-1].second)
KALDI_WARN << "Times out of order"; // this is quite bad.
if (q > 1 && sausage_times_[q-2].second > sausage_times_[q-1].first) {
// We previously had a warning here, but now we'll just set both
// those values to their average. It's quite possible for this
// condition to happen, but it seems like it would have a bad effect
// on the downstream processing, so we fix it.
double avg = 0.5 * (times_[q-2].second + times_[q-1].first);
times_[q-2].second = times_[q-1].first = avg;
sausage_times_[q-2].second = sausage_times_[q-1].first =
0.5 * (sausage_times_[q-2].second + sausage_times_[q-1].first);
}
}
}
Expand Down Expand Up @@ -371,7 +408,7 @@ MinimumBayesRisk::MinimumBayesRisk(const CompactLattice &clat_in,
PrepareLatticeAndInitStats(&clat);

R_ = words;
times_ = times;
sausage_times_ = times;
L_ = 0.0;

MbrDecode();
Expand Down
41 changes: 28 additions & 13 deletions src/lat/sausages.h
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

// Copyright 2012 Johns Hopkins University (Author: Daniel Povey)
// 2015 Guoguo Chen
// 2019 Dogan Can

// See ../../COPYING for clarification regarding multiple authors
//
Expand Down Expand Up @@ -104,26 +105,35 @@ class MinimumBayesRisk {
return R_;
}

const std::vector<std::vector<std::pair<BaseFloat, BaseFloat> > > GetTimes() const {
return times_; // returns average (start,end) times for each word in each
// bin. These are raw averages without any processing, i.e. time intervals
// from different bins can overlap.
}

const std::vector<std::pair<BaseFloat, BaseFloat> > GetSausageTimes() const {
return times_; // returns average (start,end) times for each bin (each entry
// of GetSausageStats()). Note: if you want the times for the one best,
// you can work out the one best yourself from the sausage stats and get the times
// at the same time.
return sausage_times_; // returns average (start,end) times for each bin.
// This is typically the weighted average of the times in GetTimes() but can
// be slightly different if the times for the bins overlap, in which case
// the times returned by this method do not overlap unlike the times
// returned by GetTimes().
}

const std::vector<std::pair<BaseFloat, BaseFloat> > &GetOneBestTimes() const {
return one_best_times_; // returns average (start,end) times for each bin corresponding
// to an entry in the one-best output. This is just the appropriate
// subsequence of the times in SausageTimes().
return one_best_times_; // returns average (start,end) times for each word
// corresponding to an entry in the one-best output. This is typically the
// appropriate subset of the times in GetTimes() but can be slightly
// different if the times for the one-best words overlap, in which case
// the times returned by this method do not overlap unlike the times
// returned by GetTimes().
}

/// Outputs the confidences for the one-best transcript.
const std::vector<BaseFloat> &GetOneBestConfidences() const {
return one_best_confidences_;
}

/// Returns the expected WER over this sentence (assuming
/// model correctness.
/// Returns the expected WER over this sentence (assuming model correctness).
BaseFloat GetBayesRisk() const { return L_; }

const std::vector<std::vector<std::pair<int32, BaseFloat> > > &GetSausageStats() const {
Expand Down Expand Up @@ -222,15 +232,20 @@ class MinimumBayesRisk {
// paper. We sort in reverse order on the second member (posterior), so more
// likely word is first.

std::vector<std::pair<BaseFloat, BaseFloat> > times_;
std::vector<std::vector<std::pair<BaseFloat, BaseFloat> > > times_;
// The average start and end times for words in each confusion-network bin.
// This is like an average over arcs, of the tau_b and tau_e quantities in
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This comment looks like it was copied from below. Is it correct?
The indexing is not clear to me (also the original code).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the comment is correct. What used to be times_ is now called sausage_times_.

times_          -> tau_b(q, w), tau_e(q, w)
sausage_times_  -> tau_b(q), tau_e(q)

// Appendix C of the paper. Indexed from zero, like gamma_ and R_.

std::vector<std::pair<BaseFloat, BaseFloat> > sausage_times_;
// The average start and end times for each confusion-network bin. This
// is like an average over words, of the tau_b and tau_e quantities in
// Appendix C of the paper. Indexed from zero, like gamma_ and R_.

std::vector<std::pair<BaseFloat, BaseFloat> > one_best_times_;
// one_best_times_ is a subsequence of times_, corresponding to
// (start,end) times of words in the one best output. Actually these
// times are averages over the bin that each word came from.
// The average start and end times for words in the one best output. This
// is like an average over the arcs, of the tau_b and tau_e quantities in
// Appendix C of the paper. Indexed from zero, like gamma_ and R_.

std::vector<BaseFloat> one_best_confidences_;
// vector of confidences for the 1-best output (which could be
Expand Down