diff --git a/scripts/rnnlm/lmrescore_pruned.sh b/scripts/rnnlm/lmrescore_pruned.sh index 506527f4f6b..a7190e813de 100755 --- a/scripts/rnnlm/lmrescore_pruned.sh +++ b/scripts/rnnlm/lmrescore_pruned.sh @@ -16,7 +16,7 @@ max_ngram_order=4 # Approximate the lattice-rescoring by limiting the max-ngram- # the same ngram history and this prevents the lattice from # exploding exponentially. Details of the n-gram approximation # method are described in section 2.3 of the paper - # http://www.danielpovey.com/files/2018_icassp_lattice_pruning.pdm + # http://www.danielpovey.com/files/2018_icassp_lattice_pruning.pdf max_arcs= # limit the max arcs in lattice while rescoring. E.g., 20000 acwt=0.1 @@ -26,6 +26,8 @@ normalize=false # If true, we add a normalization step to the output of the RNNL # as in our RNNLM setup, a properly trained network would automatically # have its normalization term close to 1. The details of this # could be found at http://www.danielpovey.com/files/2018_icassp_rnnlm.pdf +lattice_prune_beam=4 # Beam used in pruned lattice composition + # This option affects speed and how large the composed lattice may be # End configuration section. @@ -97,6 +99,7 @@ cp $indir/num_jobs $outdir $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \ lattice-lmrescore-kaldi-rnnlm-pruned --lm-scale=$weight $special_symbol_opts \ + --lattice-compose-beam=$lattice_prune_beam \ --acoustic-scale=$acwt --max-ngram-order=$max_ngram_order $normalize_opt $max_arcs_opt \ $carpa_option $oldlm $word_embedding "$rnnlm_dir/final.raw" \ "ark:gunzip -c $indir/lat.JOB.gz|" "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1; diff --git a/src/lat/compose-lattice-pruned.cc b/src/lat/compose-lattice-pruned.cc index f37e72ba1af..d80db2f0f46 100644 --- a/src/lat/compose-lattice-pruned.cc +++ b/src/lat/compose-lattice-pruned.cc @@ -272,7 +272,14 @@ class PrunedCompactLatticeComposer { // BaseFloat expected_cost_offset; }; - + // This bool variable is initialized to false, and will be updated to true + // the first time a Final() function is called on the det_fst_. Then we will + // immediately call RecomputeRruningInfo() so that the output_best_cost_ is + // changed from +inf to a finite value, to be used in beam search. This is the + // only time the RecomputeRruningInfo() function is called manually; otherwise + // it always follows an automatic schedule based on the num-arcs of the output + // lattice. + bool output_reached_final_; const ComposeLatticePrunedOptions &opts_; const CompactLattice &clat_in_; fst::DeterministicOnDemandFst *det_fst_; @@ -584,7 +591,7 @@ PrunedCompactLatticeComposer::PrunedCompactLatticeComposer( const ComposeLatticePrunedOptions &opts, const CompactLattice &clat_in, fst::DeterministicOnDemandFst *det_fst, - CompactLattice* composed_clat): + CompactLattice* composed_clat): output_reached_final_(false), opts_(opts), clat_in_(clat_in), det_fst_(det_fst), clat_out_(composed_clat), num_arcs_out_(0), @@ -697,6 +704,10 @@ void PrunedCompactLatticeComposer::ProcessQueueElement( double final_cost = ConvertToCost(final_lat_weight); if (final_cost < src_composed_state_info.backward_cost) src_composed_state_info.backward_cost = final_cost; + if (!output_reached_final_) { + output_reached_final_ = true; + RecomputePruningInfo(); + } } } else { // It really was an arc. This code is very complicated, so we make it its