From f60300f1799b8e5652f7bedcc3b0f60111282999 Mon Sep 17 00:00:00 2001 From: Jinyi Yang Date: Wed, 26 Oct 2016 21:05:39 -0400 Subject: [PATCH 1/4] Fix gawk-specific features problem in prepare dict scripts --- egs/fisher_english/s5/local/fisher_prepare_dict.sh | 5 +---- egs/fisher_swbd/s5/local/fisher_prepare_dict.sh | 5 +---- egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh | 5 +---- egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh | 5 +---- egs/sre10/v1/local/dnn/fisher_prepare_dict.sh | 5 +---- egs/swbd/s5/local/swbd1_prepare_dict.sh | 5 +---- egs/swbd/s5/local/swbd_p1_prepare_dict.sh | 5 +---- egs/swbd/s5b/local/swbd1_prepare_dict.sh | 4 +--- egs/swbd/s5c/local/swbd1_prepare_dict.sh | 3 +-- 9 files changed, 9 insertions(+), 33 deletions(-) diff --git a/egs/fisher_english/s5/local/fisher_prepare_dict.sh b/egs/fisher_english/s5/local/fisher_prepare_dict.sh index bcf672cf057..c19cf5eeb7e 100755 --- a/egs/fisher_english/s5/local/fisher_prepare_dict.sh +++ b/egs/fisher_english/s5/local/fisher_prepare_dict.sh @@ -122,10 +122,7 @@ srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text #(2a) Dictionary preparation: # Pre-processing (Upper-case, remove comments) -awk 'BEGIN{getline}($0 !~ /^#/) {$0=toupper($0); print}' \ - $srcdict | sort | awk '($0 !~ /^[:space:]*$/) {print}' \ - > $dir/lexicon1.txt || exit 1; - +grep -v '^#' $srcdict | tr '[a-z]' '[A-Z]' | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1; cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ grep -v SIL > $dir/nonsilence_phones.txt || exit 1; diff --git a/egs/fisher_swbd/s5/local/fisher_prepare_dict.sh b/egs/fisher_swbd/s5/local/fisher_prepare_dict.sh index 8023c5c29f2..984c5ec5437 100755 --- a/egs/fisher_swbd/s5/local/fisher_prepare_dict.sh +++ b/egs/fisher_swbd/s5/local/fisher_prepare_dict.sh @@ -117,10 +117,7 @@ patch $dir/lexicon1_swbd.txt || exit 1; - +grep -v '^#' $dir/lexicon0.txt | awk 'NF>0' | sort > $dir/lexicon1_swbd.txt || exit 1; cat $dir/lexicon1_swbd.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ grep -v SIL > $dir/nonsilence_phones_msu.txt || exit 1; diff --git a/egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh b/egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh index 2ca5dc31e9d..5242c3dc4b4 100755 --- a/egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh +++ b/egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh @@ -117,10 +117,7 @@ patch $dir/lexicon1_swbd.txt || exit 1; - +grep -v '^#' $dir/lexicon0.txt | awk 'NF>0' | sort > $dir/lexicon1_swbd.txt || exit 1; cat $dir/lexicon1_swbd.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ grep -v SIL > $dir/nonsilence_phones_msu.txt || exit 1; diff --git a/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh b/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh index 5ec0f35a10b..3c242aa7383 100755 --- a/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh +++ b/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh @@ -20,10 +20,7 @@ srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text #(2a) Dictionary preparation: # Pre-processing (Upper-case, remove comments) -awk 'BEGIN{getline}($0 !~ /^#/) {$0=tolower($0); print}' \ - $srcdict | sort | awk '($0 !~ /^[:space:]*$/) {print}' \ - > $dir/lexicon1.txt || exit 1; - +grep -v '^#' $srcdict | tr '[a-z]' '[A-Z]' | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1; cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ grep -v sil > $dir/nonsilence_phones.txt || exit 1; diff --git a/egs/sre10/v1/local/dnn/fisher_prepare_dict.sh b/egs/sre10/v1/local/dnn/fisher_prepare_dict.sh index e1d726ef19c..bd41cc9724a 100755 --- a/egs/sre10/v1/local/dnn/fisher_prepare_dict.sh +++ b/egs/sre10/v1/local/dnn/fisher_prepare_dict.sh @@ -122,10 +122,7 @@ srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text #(2a) Dictionary preparation: # Pre-processing (Upper-case, remove comments) -awk 'BEGIN{getline}($0 !~ /^#/) {$0=toupper($0); print}' \ - $srcdict | sort | awk '($0 !~ /^[:space:]*$/) {print}' \ - > $dir/lexicon1.txt || exit 1; - +grep -v '^#' $srcdict | tr '[a-z]' '[A-Z]' | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1; cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ grep -v SIL > $dir/nonsilence_phones.txt || exit 1; diff --git a/egs/swbd/s5/local/swbd1_prepare_dict.sh b/egs/swbd/s5/local/swbd1_prepare_dict.sh index a91b555342f..d31eab00970 100755 --- a/egs/swbd/s5/local/swbd1_prepare_dict.sh +++ b/egs/swbd/s5/local/swbd1_prepare_dict.sh @@ -20,10 +20,7 @@ srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text #(2a) Dictionary preparation: # Pre-processing (Upper-case, remove comments) -awk 'BEGIN{getline}($0 !~ /^#/) {$0=tolower($0); print}' \ - $srcdict | sort | awk '($0 !~ /^[[:space:]]*$/) {print}' \ - > $dir/lexicon1.txt || exit 1; - +grep -v '^#' $srcdict | tr '[a-z]' '[A-Z]' | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1; cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ grep -v sil > $dir/nonsilence_phones.txt || exit 1; diff --git a/egs/swbd/s5/local/swbd_p1_prepare_dict.sh b/egs/swbd/s5/local/swbd_p1_prepare_dict.sh index fc19f1355d6..3e633d7b734 100755 --- a/egs/swbd/s5/local/swbd_p1_prepare_dict.sh +++ b/egs/swbd/s5/local/swbd_p1_prepare_dict.sh @@ -25,10 +25,7 @@ srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text #(2a) Dictionary preparation: # Pre-processing (Upper-case, remove comments) -awk 'BEGIN{getline}($0 !~ /^#/) {$0=toupper($0); print}' \ - $srcdict | sort | awk '($0 !~ /^[:space:]*$/) {print}' \ - > $dir/lexicon1.txt || exit 1; - +grep -v '^#' $srcdict | tr '[a-z]' '[A-Z]' | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1; cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ grep -v SIL > $dir/nonsilence_phones.txt || exit 1; diff --git a/egs/swbd/s5b/local/swbd1_prepare_dict.sh b/egs/swbd/s5b/local/swbd1_prepare_dict.sh index d860e5e0c2c..92838ca02f4 100755 --- a/egs/swbd/s5b/local/swbd1_prepare_dict.sh +++ b/egs/swbd/s5b/local/swbd1_prepare_dict.sh @@ -23,9 +23,7 @@ patch $dir/lexicon1.txt || exit 1; +grep -v '^#' $srcdict | tr '[a-z]' '[A-Z]' | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1; cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ diff --git a/egs/swbd/s5c/local/swbd1_prepare_dict.sh b/egs/swbd/s5c/local/swbd1_prepare_dict.sh index 5bd9abc6a77..73e2510144e 100755 --- a/egs/swbd/s5c/local/swbd1_prepare_dict.sh +++ b/egs/swbd/s5c/local/swbd1_prepare_dict.sh @@ -22,10 +22,9 @@ cp $srcdict $dir/lexicon0.txt || exit 1; patch 0' | sort > $dir/lexicon1.txt || exit 1; - cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ grep -v sil > $dir/nonsilence_phones.txt || exit 1; From 6937f79b9cc2413355a1d54affa205072cb8eb80 Mon Sep 17 00:00:00 2001 From: Jinyi Yang Date: Wed, 26 Oct 2016 21:07:47 -0400 Subject: [PATCH 2/4] Fix gawk-specific features problem in prepare dict scripts --- egs/aspire/s5/local/fisher_prepare_dict.sh | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/egs/aspire/s5/local/fisher_prepare_dict.sh b/egs/aspire/s5/local/fisher_prepare_dict.sh index f643d924b26..577e2869c0b 100755 --- a/egs/aspire/s5/local/fisher_prepare_dict.sh +++ b/egs/aspire/s5/local/fisher_prepare_dict.sh @@ -122,10 +122,7 @@ srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text #(2a) Dictionary preparation: # Pre-processing (Upper-case, remove comments) -awk 'BEGIN{getline}($0 !~ /^#/) {$0=toupper($0); print}' \ - $srcdict | sort | awk '($0 !~ /^[:space:]*$/) {print}' \ - > $dir/lexicon1.txt || exit 1; - +grep -v '^#' $srcdict | tr '[a-z]' '[A-Z]' | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1; cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ grep -v SIL > $dir/nonsilence_phones.txt || exit 1; From ac0bde81e782369ebc5541554006b2a141151fca Mon Sep 17 00:00:00 2001 From: Jinyi Yang Date: Wed, 26 Oct 2016 21:24:25 -0400 Subject: [PATCH 3/4] Correct upper to lower case in dict preparation scripts --- egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh | 2 +- egs/swbd/s5/local/swbd1_prepare_dict.sh | 2 +- egs/swbd/s5b/local/swbd1_prepare_dict.sh | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh b/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh index 3c242aa7383..1fa78f39f89 100755 --- a/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh +++ b/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh @@ -20,7 +20,7 @@ srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text #(2a) Dictionary preparation: # Pre-processing (Upper-case, remove comments) -grep -v '^#' $srcdict | tr '[a-z]' '[A-Z]' | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1; +grep -v '^#' $srcdict | tr '[A-Z]' '[a-z]' | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1; cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ grep -v sil > $dir/nonsilence_phones.txt || exit 1; diff --git a/egs/swbd/s5/local/swbd1_prepare_dict.sh b/egs/swbd/s5/local/swbd1_prepare_dict.sh index d31eab00970..a638a84fd5b 100755 --- a/egs/swbd/s5/local/swbd1_prepare_dict.sh +++ b/egs/swbd/s5/local/swbd1_prepare_dict.sh @@ -20,7 +20,7 @@ srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text #(2a) Dictionary preparation: # Pre-processing (Upper-case, remove comments) -grep -v '^#' $srcdict | tr '[a-z]' '[A-Z]' | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1; +grep -v '^#' $srcdict | tr '[A-Z]' '[a-z]' | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1; cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ grep -v sil > $dir/nonsilence_phones.txt || exit 1; diff --git a/egs/swbd/s5b/local/swbd1_prepare_dict.sh b/egs/swbd/s5b/local/swbd1_prepare_dict.sh index 92838ca02f4..2fc7e08d7de 100755 --- a/egs/swbd/s5b/local/swbd1_prepare_dict.sh +++ b/egs/swbd/s5b/local/swbd1_prepare_dict.sh @@ -23,7 +23,7 @@ patch 0' | sort > $dir/lexicon1.txt || exit 1; +grep -v '^#' $srcdict | tr '[A-Z]' '[a-z]' | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1; cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ From e938c69540dc7c7ce3161de55cba179a99fb34de Mon Sep 17 00:00:00 2001 From: Jinyi Yang Date: Wed, 26 Oct 2016 21:34:55 -0400 Subject: [PATCH 4/4] Remove useless comments(upper-case) --- egs/fisher_swbd/s5/local/fisher_prepare_dict.sh | 2 +- egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh | 2 +- egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh | 2 +- egs/swbd/s5/local/swbd1_prepare_dict.sh | 2 +- egs/swbd/s5c/local/swbd1_prepare_dict.sh | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/egs/fisher_swbd/s5/local/fisher_prepare_dict.sh b/egs/fisher_swbd/s5/local/fisher_prepare_dict.sh index 984c5ec5437..98b1e84cdde 100755 --- a/egs/fisher_swbd/s5/local/fisher_prepare_dict.sh +++ b/egs/fisher_swbd/s5/local/fisher_prepare_dict.sh @@ -116,7 +116,7 @@ cp $srcdict $dir/lexicon0.txt || exit 1; patch 0' | sort > $dir/lexicon1_swbd.txt || exit 1; cat $dir/lexicon1_swbd.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ diff --git a/egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh b/egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh index 5242c3dc4b4..338e4f28a13 100755 --- a/egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh +++ b/egs/fisher_swbd/s5/local/fisher_swbd_prepare_dict.sh @@ -116,7 +116,7 @@ cp $srcdict $dir/lexicon0.txt || exit 1; patch 0' | sort > $dir/lexicon1_swbd.txt || exit 1; cat $dir/lexicon1_swbd.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ diff --git a/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh b/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh index 1fa78f39f89..bedee1486a8 100755 --- a/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh +++ b/egs/fisher_swbd/s5/local/swbd1_prepare_dict.sh @@ -19,7 +19,7 @@ srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text [ ! -f "$srcdict" ] && echo "No such file $srcdict" && exit 1; #(2a) Dictionary preparation: -# Pre-processing (Upper-case, remove comments) +# Pre-processing (Lower-case, remove comments) grep -v '^#' $srcdict | tr '[A-Z]' '[a-z]' | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1; cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ diff --git a/egs/swbd/s5/local/swbd1_prepare_dict.sh b/egs/swbd/s5/local/swbd1_prepare_dict.sh index a638a84fd5b..3d147ff9224 100755 --- a/egs/swbd/s5/local/swbd1_prepare_dict.sh +++ b/egs/swbd/s5/local/swbd1_prepare_dict.sh @@ -19,7 +19,7 @@ srcdict=$srcdir/swb_ms98_transcriptions/sw-ms98-dict.text [ ! -f "$srcdict" ] && echo "No such file $srcdict" && exit 1; #(2a) Dictionary preparation: -# Pre-processing (Upper-case, remove comments) +# Pre-processing (Lower-case, remove comments) grep -v '^#' $srcdict | tr '[A-Z]' '[a-z]' | awk 'NF>0' | sort > $dir/lexicon1.txt || exit 1; cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \ diff --git a/egs/swbd/s5c/local/swbd1_prepare_dict.sh b/egs/swbd/s5c/local/swbd1_prepare_dict.sh index 73e2510144e..673513806dc 100755 --- a/egs/swbd/s5c/local/swbd1_prepare_dict.sh +++ b/egs/swbd/s5c/local/swbd1_prepare_dict.sh @@ -22,7 +22,7 @@ cp $srcdict $dir/lexicon0.txt || exit 1; patch 0' | sort > $dir/lexicon1.txt || exit 1; cat $dir/lexicon1.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}' | \