diff --git a/.gitignore b/.gitignore index 910d5cb019d..4cf0fa4efa9 100644 --- a/.gitignore +++ b/.gitignore @@ -83,6 +83,7 @@ GSYMS /tools/ATLAS/ /tools/atlas3.8.3.tar.gz /tools/irstlm/ +/tools/mitlm/ /tools/openfst /tools/openfst-1.3.2.tar.gz /tools/openfst-1.3.2/ diff --git a/egs/babel/s5d/conf/lang/404-georgian.FLP.official.conf b/egs/babel/s5d/conf/lang/404-georgian.FLP.official.conf index a6b22de419f..9cd043716ce 100644 --- a/egs/babel/s5d/conf/lang/404-georgian.FLP.official.conf +++ b/egs/babel/s5d/conf/lang/404-georgian.FLP.official.conf @@ -75,8 +75,8 @@ unsup_data_list=./conf/lists/404-georgian/untranscribed-training.list unsup_nj=32 -lexicon_file= -lexiconFlags="--romanized --oov " +lexicon_file=/export/corpora/LDC/LDC2016S12/IARPA_BABEL_OP3_404/conversational/reference_materials/lexicon.txt +lexiconFlags=" --romanized --oov " diff --git a/egs/babel/s5d/local/make_L_align.sh b/egs/babel/s5d/local/make_L_align.sh index 50e46a00493..41e9ff32958 100755 --- a/egs/babel/s5d/local/make_L_align.sh +++ b/egs/babel/s5d/local/make_L_align.sh @@ -34,18 +34,24 @@ tmpdir=$1 dir=$2 outdir=$3 +for f in $dir/phones/optional_silence.txt $dir/phones.txt $dir/words.txt ; do + [ ! -f $f ] && echo "$0: The file $f must exist!" exit 1 +fi + silphone=`cat $dir/phones/optional_silence.txt` || exit 1; +if [ ! -f $tmpdir/lexicon.txt ] && [ ! -f $tmpdir/lexiconp.txt ] ; then + echo "$0: At least one of the files $tmpdir/lexicon.txt or $tmpdir/lexiconp.txt must exist" >&2 + exit 1 +fi + # Create lexicon with alignment info if [ -f $tmpdir/lexicon.txt ] ; then cat $tmpdir/lexicon.txt | \ awk '{printf("%s #1 ", $1); for (n=2; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' -elif [ -f $tmpdir/lexiconp.txt ] ; then +else cat $tmpdir/lexiconp.txt | \ awk '{printf("%s #1 ", $1); for (n=3; n <= NF; n++) { printf("%s ", $n); } print "#2"; }' -else - echo "Neither $tmpdir/lexicon.txt nor $tmpdir/lexiconp.txt does not exist" - exit 1 fi | utils/make_lexicon_fst.pl - 0.5 $silphone | \ fstcompile --isymbols=$dir/phones.txt --osymbols=$dir/words.txt \ --keep_isymbols=false --keep_osymbols=false | \