diff --git a/egs/wsj/s5/utils/build_const_arpa_lm.sh b/egs/wsj/s5/utils/build_const_arpa_lm.sh index ec067df0d39..51aca1bb2ad 100755 --- a/egs/wsj/s5/utils/build_const_arpa_lm.sh +++ b/egs/wsj/s5/utils/build_const_arpa_lm.sh @@ -34,8 +34,8 @@ mkdir -p $new_lang cp -r $old_lang/* $new_lang unk=`cat $new_lang/oov.int` -bos=`grep -w "" $new_lang/words.txt | awk '{print $2}'` -eos=`grep "" $new_lang/words.txt | awk '{print $2}'` +bos=`grep "^\s" $new_lang/words.txt | awk '{print $2}'` +eos=`grep "^\s" $new_lang/words.txt | awk '{print $2}'` if [[ -z $bos || -z $eos ]]; then echo "$0: and symbols are not in $new_lang/words.txt" exit 1