diff --git a/scripts/rnnlm/lmrescore_pruned.sh b/scripts/rnnlm/lmrescore_pruned.sh
index 506527f4f6b..a7190e813de 100755
--- a/scripts/rnnlm/lmrescore_pruned.sh
+++ b/scripts/rnnlm/lmrescore_pruned.sh
@@ -16,7 +16,7 @@ max_ngram_order=4 # Approximate the lattice-rescoring by limiting the max-ngram-
                   # the same ngram history and this prevents the lattice from 
                   # exploding exponentially. Details of the n-gram approximation
                   # method are described in section 2.3 of the paper
-                  # http://www.danielpovey.com/files/2018_icassp_lattice_pruning.pdm
+                  # http://www.danielpovey.com/files/2018_icassp_lattice_pruning.pdf
 max_arcs=         # limit the max arcs in lattice while rescoring. E.g., 20000
 
 acwt=0.1
@@ -26,6 +26,8 @@ normalize=false # If true, we add a normalization step to the output of the RNNL
                 # as in our RNNLM setup, a properly trained network would automatically
                 # have its normalization term close to 1. The details of this
                 # could be found at http://www.danielpovey.com/files/2018_icassp_rnnlm.pdf
+lattice_prune_beam=4 # Beam used in pruned lattice composition
+                     # This option affects speed and how large the composed lattice may be
 
 # End configuration section.
 
@@ -97,6 +99,7 @@ cp $indir/num_jobs $outdir
 
 $cmd JOB=1:$nj $outdir/log/rescorelm.JOB.log \
   lattice-lmrescore-kaldi-rnnlm-pruned --lm-scale=$weight $special_symbol_opts \
+    --lattice-compose-beam=$lattice_prune_beam \
     --acoustic-scale=$acwt --max-ngram-order=$max_ngram_order $normalize_opt $max_arcs_opt \
     $carpa_option $oldlm $word_embedding "$rnnlm_dir/final.raw" \
     "ark:gunzip -c $indir/lat.JOB.gz|" "ark,t:|gzip -c>$outdir/lat.JOB.gz" || exit 1;
diff --git a/src/lat/compose-lattice-pruned.cc b/src/lat/compose-lattice-pruned.cc
index f37e72ba1af..d80db2f0f46 100644
--- a/src/lat/compose-lattice-pruned.cc
+++ b/src/lat/compose-lattice-pruned.cc
@@ -272,7 +272,14 @@ class PrunedCompactLatticeComposer {
     // BaseFloat expected_cost_offset;
   };
 
-
+  // This bool variable is initialized to false, and will be updated to true
+  // the first time a Final() function is called on the det_fst_. Then we will
+  // immediately call RecomputeRruningInfo() so that the output_best_cost_ is
+  // changed from +inf to a finite value, to be used in beam search. This is the
+  // only time the RecomputeRruningInfo() function is called manually; otherwise
+  // it always follows an automatic schedule based on the num-arcs of the output
+  // lattice.
+  bool output_reached_final_;
   const ComposeLatticePrunedOptions &opts_;
   const CompactLattice &clat_in_;
   fst::DeterministicOnDemandFst<fst::StdArc> *det_fst_;
@@ -584,7 +591,7 @@ PrunedCompactLatticeComposer::PrunedCompactLatticeComposer(
       const ComposeLatticePrunedOptions &opts,
       const CompactLattice &clat_in,
       fst::DeterministicOnDemandFst<fst::StdArc> *det_fst,
-      CompactLattice* composed_clat):
+      CompactLattice* composed_clat): output_reached_final_(false),
     opts_(opts), clat_in_(clat_in), det_fst_(det_fst),
     clat_out_(composed_clat),
     num_arcs_out_(0),
@@ -697,6 +704,10 @@ void PrunedCompactLatticeComposer::ProcessQueueElement(
       double final_cost = ConvertToCost(final_lat_weight);
       if (final_cost < src_composed_state_info.backward_cost)
         src_composed_state_info.backward_cost = final_cost;
+      if (!output_reached_final_) {
+        output_reached_final_ = true;
+        RecomputePruningInfo();
+      }
     }
   } else {
     // It really was an arc.  This code is very complicated, so we make it its