Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 8 additions & 19 deletions egs/aishell2/s5/local/prepare_dict.sh
Original file line number Diff line number Diff line change
Expand Up @@ -10,15 +10,17 @@
download_dir=data/local/DaCiDian
dir=data/local/dict

if [ $# -ne 1 ]; then
if [ $# -ne 1 ]; then
echo "Usage: $0 <dict-dir>";
exit 1;
fi

dir=$1

# download the DaCiDian from github
git clone https://github.com/aishell-foundation/DaCiDian.git $download_dir
if [ ! -d $download_dir ]; then
git clone https://github.com/aishell-foundation/DaCiDian.git $download_dir
fi

# here we map <UNK> to the phone spn(spoken noise)
mkdir -p $dir
Expand All @@ -27,31 +29,18 @@ echo -e "<UNK>\tspn" >> $dir/lexicon.txt

# prepare silence_phones.txt, nonsilence_phones.txt, optional_silence.txt, extra_questions.txt
cat $dir/lexicon.txt | awk '{ for(n=2;n<=NF;n++){ phones[$n] = 1; }} END{for (p in phones) print p;}'| \
sort -u |\
perl -e '
my %ph_cl;
while (<STDIN>) {
$phone = $_;
chomp($phone);
chomp($_);
$phone = $_;
next if ($phone eq "sil");
if (exists $ph_cl{$phone}) { push(@{$ph_cl{$phone}}, $_) }
else { $ph_cl{$phone} = [$_]; }
}
foreach $key ( keys %ph_cl ) {
print "@{ $ph_cl{$key} }\n"
}
perl -e 'while(<>){ chomp($_); $phone = $_; next if ($phone eq "sil");
m:^([^\d]+)(\d*)$: || die "Bad phone $_"; $q{$1} .= "$phone "; }
foreach $l (values %q) {print "$l\n";}
' | sort -k1 > $dir/nonsilence_phones.txt || exit 1;

echo sil > $dir/silence_phones.txt
echo sil > $dir/optional_silence.txt

cat $dir/silence_phones.txt | awk '{printf("%s ", $1);} END{printf "\n";}' > $dir/extra_questions.txt || exit 1;
cat $dir/nonsilence_phones.txt | perl -e 'while(<>){ foreach $p (split(" ", $_)) {
$p =~ m:^([^\d]+)(\d*)$: || die "Bad phone $_"; $q{$2} .= "$p "; } } foreach $l (values %q) {print "$l\n";}' \
$p =~ m:^([^\d]+)(\d*)$: || die "Bad phone $_"; if($p eq "\$0"){$q{""} .= "$p ";}else{$q{$2} .= "$p ";} } } foreach $l (values %q) {print "$l\n";}' \
>> $dir/extra_questions.txt || exit 1;

echo "local/prepare_dict.sh succeeded"
exit 0;