From 74068870835ecb855c78c98593d6b9d4f09ce24d Mon Sep 17 00:00:00 2001 From: Daniel Povey Date: Fri, 19 Jul 2019 18:25:48 -0400 Subject: [PATCH] [src] Fixes RE unusual topologies --- egs/wsj/s5/steps/nnet3/chain/gen_topo5.py | 22 ++++++++++------------ src/fstext/fstext-utils.h | 3 ++- src/hmm/hmm-utils.cc | 5 +++-- 3 files changed, 15 insertions(+), 15 deletions(-) diff --git a/egs/wsj/s5/steps/nnet3/chain/gen_topo5.py b/egs/wsj/s5/steps/nnet3/chain/gen_topo5.py index 1583966b58c..9df502545a5 100755 --- a/egs/wsj/s5/steps/nnet3/chain/gen_topo5.py +++ b/egs/wsj/s5/steps/nnet3/chain/gen_topo5.py @@ -2,6 +2,9 @@ # Copyright 2012 Johns Hopkins University (author: Daniel Povey) +# This script was modified around 11.11.2016, when the code was extended to +# support having a different pdf-class on the self loop. + # Generate a topology file. This allows control of the number of states in the # non-silence HMMs, and in the silence HMMs. This is a modified version of # 'utils/gen_topo.pl' that generates a different type of topology, one that we @@ -29,22 +32,17 @@ nonsilence_phones = [ int(x) for x in args.nonsilence_phones.split(":") ] all_phones = silence_phones + nonsilence_phones + print("") print("") print("") print(" ".join([str(x) for x in all_phones])) print("") -# state 0 is nonemitting -print(" 0 1 0.5 2 0.5 ") -# state 1 is for when we traverse it in 1 state -print(" 1 0 4 1.0 ") -# state 2 is for when we traverse it in >1 state, for the first state. -print(" 2 2 3 1.0 ") -# state 3 is for the self-loop. Use pdf-class 1 here so that the default -# phone-class clustering (which uses only pdf-class 1 by default) gets only -# stats from longer phones. -print(" 3 1 3 0.5 4 0.5 ") -print(" 4 ") +print("0 1 1 0.69314718055") +print("0 2 3 0.69314718055") +print("1 1 2 0.69314718055") +print("1 0.69314718055") +print("2 0.0") +print("") print("") print("") - diff --git a/src/fstext/fstext-utils.h b/src/fstext/fstext-utils.h index 25c4a53c633..b220dd59c62 100644 --- a/src/fstext/fstext-utils.h +++ b/src/fstext/fstext-utils.h @@ -259,7 +259,8 @@ void MakePrecedingInputSymbolsSame(bool start_is_epsilon, MutableFst *fst); /// As MakePrecedingInputSymbolsSame, but takes a functor object that maps -/// labels to (int32) classes +/// labels to (int32) classes. Caution: it must not map kNoLabel (-1) +/// to the same value as any real symbol. template void MakePrecedingInputSymbolsSameClass(bool start_is_epsilon, MutableFst *fst, const F &f); diff --git a/src/hmm/hmm-utils.cc b/src/hmm/hmm-utils.cc index 7acf8e8068c..7bd6070f151 100644 --- a/src/hmm/hmm-utils.cc +++ b/src/hmm/hmm-utils.cc @@ -381,7 +381,8 @@ class TidToSelfLoopMapper { // This maps valid transition-ids to transition states, and maps all other // symbols (i.e. epsilon symbols, disambig symbols, and symbols with values - // over 100000/kNontermBigNumber) to zero. Its point is to provide an + // over 100000/kNontermBigNumber) to zero. (and -1 == kNoLabel to -1). + // Its purpose is to provide an // equivalence class on labels that's relevant to what the self-loop will be // on the following state. TidToSelfLoopMapper(const Transitions &trans_model, @@ -397,7 +398,7 @@ class TidToSelfLoopMapper { KALDI_ERR << "AddSelfLoops: graph already has self-loops."; return trans_model_.InfoForTransitionId(tid).self_loop_transition_id; } else if (tid == fst::kNoLabel) { - return 0; + return -1; } else { // 0 or (presumably) disambiguation symbol. Map to zero int32 big_number = fst::kNontermBigNumber; // 1000000 if (tid != 0 && tid < big_number) {