diff --git a/egs/aspire/s5/local/fisher_prepare_dict.sh b/egs/aspire/s5/local/fisher_prepare_dict.sh index f643d924b26..577e2869c0b 100755 --- a/egs/aspire/s5/local/fisher_prepare_dict.sh +++ b/egs/aspire/s5/local/fisher_prepare_dict.sh @@ -122,10 +122,7 @@ srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text #(2a) Dictionary preparation: # Pre-processing (Upper-case, remove comments) -awk 'BEGIN{getline}($0 !~ /^#/) {$0=toupper($0); print}' \ - $srcdict | sort | awk '($0 !~ /^[:space:]*$/) {print}' \ - > $dir/lexicon1.txt || exit 1; - +grep -v '^#' $srcdict | tr '[a-z]' '[A-Z]' | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1; cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ grep -v SIL > $dir/nonsilence_phones.txt || exit 1; diff --git a/egs/fisher_english/s5/local/fisher_prepare_dict.sh b/egs/fisher_english/s5/local/fisher_prepare_dict.sh index bcf672cf057..c19cf5eeb7e 100755 --- a/egs/fisher_english/s5/local/fisher_prepare_dict.sh +++ b/egs/fisher_english/s5/local/fisher_prepare_dict.sh @@ -122,10 +122,7 @@ srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text #(2a) Dictionary preparation: # Pre-processing (Upper-case, remove comments) -awk 'BEGIN{getline}($0 !~ /^#/) {$0=toupper($0); print}' \ - $srcdict | sort | awk '($0 !~ /^[:space:]*$/) {print}' \ - > $dir/lexicon1.txt || exit 1; - +grep -v '^#' $srcdict | tr '[a-z]' '[A-Z]' | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1; cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ grep -v SIL > $dir/nonsilence_phones.txt || exit 1; diff --git a/egs/fisher_swbd/s5/local/fisher_prepare_dict.sh b/egs/fisher_swbd/s5/local/fisher_prepare_dict.sh index 8023c5c29f2..98b1e84cdde 100755 --- a/egs/fisher_swbd/s5/local/fisher_prepare_dict.sh +++ b/egs/fisher_swbd/s5/local/fisher_prepare_dict.sh @@ -116,11 +116,8 @@ cp $srcdict $dir/lexicon0.txt || exit 1; patch $dir/lexicon1_swbd.txt || exit 1; - +# Pre-processing (remove comments) +grep -v '^#' $dir/lexicon0.txt | awk 'NF>0' | sort > $dir/lexicon1_swbd.txt || exit 1; cat $dir/lexicon1_swbd.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ grep -v SIL > $dir/nonsilence_phones_msu.txt || exit 1; diff --git a/egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh b/egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh index 2ca5dc31e9d..338e4f28a13 100755 --- a/egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh +++ b/egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh @@ -116,11 +116,8 @@ cp $srcdict $dir/lexicon0.txt || exit 1; patch $dir/lexicon1_swbd.txt || exit 1; - +# Pre-processing (remove comments) +grep -v '^#' $dir/lexicon0.txt | awk 'NF>0' | sort > $dir/lexicon1_swbd.txt || exit 1; cat $dir/lexicon1_swbd.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ grep -v SIL > $dir/nonsilence_phones_msu.txt || exit 1; diff --git a/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh b/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh index 5ec0f35a10b..bedee1486a8 100755 --- a/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh +++ b/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh @@ -19,11 +19,8 @@ srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text [ ! -f "$srcdict" ] && echo "No such file $srcdict" && exit 1; #(2a) Dictionary preparation: -# Pre-processing (Upper-case, remove comments) -awk 'BEGIN{getline}($0 !~ /^#/) {$0=tolower($0); print}' \ - $srcdict | sort | awk '($0 !~ /^[:space:]*$/) {print}' \ - > $dir/lexicon1.txt || exit 1; - +# Pre-processing (Lower-case, remove comments) +grep -v '^#' $srcdict | tr '[A-Z]' '[a-z]' | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1; cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ grep -v sil > $dir/nonsilence_phones.txt || exit 1; diff --git a/egs/sre10/v1/local/dnn/fisher_prepare_dict.sh b/egs/sre10/v1/local/dnn/fisher_prepare_dict.sh index e1d726ef19c..bd41cc9724a 100755 --- a/egs/sre10/v1/local/dnn/fisher_prepare_dict.sh +++ b/egs/sre10/v1/local/dnn/fisher_prepare_dict.sh @@ -122,10 +122,7 @@ srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text #(2a) Dictionary preparation: # Pre-processing (Upper-case, remove comments) -awk 'BEGIN{getline}($0 !~ /^#/) {$0=toupper($0); print}' \ - $srcdict | sort | awk '($0 !~ /^[:space:]*$/) {print}' \ - > $dir/lexicon1.txt || exit 1; - +grep -v '^#' $srcdict | tr '[a-z]' '[A-Z]' | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1; cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ grep -v SIL > $dir/nonsilence_phones.txt || exit 1; diff --git a/egs/swbd/s5/local/swbd1_prepare_dict.sh b/egs/swbd/s5/local/swbd1_prepare_dict.sh index a91b555342f..3d147ff9224 100755 --- a/egs/swbd/s5/local/swbd1_prepare_dict.sh +++ b/egs/swbd/s5/local/swbd1_prepare_dict.sh @@ -19,11 +19,8 @@ srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text [ ! -f "$srcdict" ] && echo "No such file $srcdict" && exit 1; #(2a) Dictionary preparation: -# Pre-processing (Upper-case, remove comments) -awk 'BEGIN{getline}($0 !~ /^#/) {$0=tolower($0); print}' \ - $srcdict | sort | awk '($0 !~ /^[[:space:]]*$/) {print}' \ - > $dir/lexicon1.txt || exit 1; - +# Pre-processing (Lower-case, remove comments) +grep -v '^#' $srcdict | tr '[A-Z]' '[a-z]' | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1; cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ grep -v sil > $dir/nonsilence_phones.txt || exit 1; diff --git a/egs/swbd/s5/local/swbd_p1_prepare_dict.sh b/egs/swbd/s5/local/swbd_p1_prepare_dict.sh index fc19f1355d6..3e633d7b734 100755 --- a/egs/swbd/s5/local/swbd_p1_prepare_dict.sh +++ b/egs/swbd/s5/local/swbd_p1_prepare_dict.sh @@ -25,10 +25,7 @@ srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text #(2a) Dictionary preparation: # Pre-processing (Upper-case, remove comments) -awk 'BEGIN{getline}($0 !~ /^#/) {$0=toupper($0); print}' \ - $srcdict | sort | awk '($0 !~ /^[:space:]*$/) {print}' \ - > $dir/lexicon1.txt || exit 1; - +grep -v '^#' $srcdict | tr '[a-z]' '[A-Z]' | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1; cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ grep -v SIL > $dir/nonsilence_phones.txt || exit 1; diff --git a/egs/swbd/s5b/local/swbd1_prepare_dict.sh b/egs/swbd/s5b/local/swbd1_prepare_dict.sh index d860e5e0c2c..2fc7e08d7de 100755 --- a/egs/swbd/s5b/local/swbd1_prepare_dict.sh +++ b/egs/swbd/s5b/local/swbd1_prepare_dict.sh @@ -23,9 +23,7 @@ patch $dir/lexicon1.txt || exit 1; +grep -v '^#' $srcdict | tr '[A-Z]' '[a-z]' | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1; cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ diff --git a/egs/swbd/s5c/local/swbd1_prepare_dict.sh b/egs/swbd/s5c/local/swbd1_prepare_dict.sh index 5bd9abc6a77..673513806dc 100755 --- a/egs/swbd/s5c/local/swbd1_prepare_dict.sh +++ b/egs/swbd/s5c/local/swbd1_prepare_dict.sh @@ -22,10 +22,9 @@ cp $srcdict $dir/lexicon0.txt || exit 1; patch 0' | sort > $dir/lexicon1.txt || exit 1; - cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ grep -v sil > $dir/nonsilence_phones.txt || exit 1;