Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
47 commits
Select commit Hold shift + click to select a range
76dcd08
initial commit of yomdle farsi
Sep 13, 2018
0dee6e1
added README
Sep 13, 2018
dadb232
added some more comments
Sep 13, 2018
317b5b8
added option to use utf8 to prepend wordswq
Sep 13, 2018
8ecd648
changed normalized scoring to use data/test/text.old for ref files. A…
Sep 18, 2018
18970f0
adding normalization scripts to local/wer_output_filter
Sep 19, 2018
56f8dad
merged upstream and fixed conflicts with utils/lang/bpe/prepend_words…
Sep 19, 2018
d74410d
minor bug fix
Sep 19, 2018
97d23e5
initial commit for yomdle_zh
Sep 19, 2018
98cbe82
forgot to flip augment data
Sep 19, 2018
e3cb43e
fixed problems with nbsp and ideographic space
Sep 19, 2018
e2d5e84
Merge remote-tracking branch 'ChunChiehChang/yomdle2' into yomdle
Sep 20, 2018
98d538f
add changjie mapping
Sep 20, 2018
4d5d221
Merge remote-tracking branch 'ChunChiehChang/yomdle2' into yomdle
Sep 20, 2018
fe7e607
decrease number of leaves and minibatch size
Sep 21, 2018
a45b94b
added results to top of script and fixed bug in run_end2end.sh
Sep 24, 2018
b82b9a1
Merge remote-tracking branch 'ChunChiehChang/yomdle2' into yomdle
Sep 24, 2018
9d3156d
fixed minor bug
Sep 24, 2018
5dca66a
removed unused local/normalized_scoring and unused commented out code
Oct 1, 2018
e0af59e
modified README
Oct 1, 2018
65a18fe
changed file names
Oct 1, 2018
6679a9d
added examples to gedi2csv and yomdle2csv scripts. Also added code to…
Oct 1, 2018
654992b
removed unused comment
Oct 1, 2018
cc87549
minor change
Oct 1, 2018
36c2f52
minor change to handle cases where <s> is appears in text
Oct 1, 2018
1736397
Merge remote-tracking branch 'upstream/master' into yomdle2
Oct 15, 2018
4cebf12
fix minor changes
Oct 15, 2018
3a2a88d
merged yomdle into yomdle2
Oct 15, 2018
cacada4
don't use gpu for alignwq
Oct 15, 2018
3f9135e
adding bidi script to bpe, this is an alternative to /utils/lang/bpe/…
Oct 16, 2018
ae7b805
Merge remote-tracking branch 'upstream/master' into yomdle2
Nov 2, 2018
82b9c9e
normalize extra lm data and add extra chinese character to lexicon
Nov 7, 2018
38e584a
Merge remote-tracking branch 'upstream/master'
Nov 8, 2018
a5f2e35
Merge remote-tracking branch 'upstream/master'
Jan 16, 2019
a3464d0
mitigating some overflow error with floats
Jan 16, 2019
49f25dd
Merge remote-tracking branch 'upstream/master'
Feb 21, 2019
69a1c32
minor bug fixes. Syntax error in uw3/v1/local/process_data.py and iam…
Feb 21, 2019
59f3d41
Merge branch 'master' of github.com:ChunChiehChang/kaldi
Feb 21, 2019
f6fa20f
initial commit for casia egs
Mar 5, 2019
4a13a81
adding more files and changing data prep
Mar 5, 2019
7db581c
minor bug fixes
Mar 7, 2019
7aef07b
adding different topologies for grapheme pieces and bpe on decomposition
Mar 11, 2019
433e3f1
adding gen_topo.py file to generate topologies for different phonemes
Mar 11, 2019
5136329
updating results and small bugfix to decomposing Chinese characters
Apr 29, 2019
c75dacd
Merge remote-tracking branch 'upstream/master' into yomdle2
Apr 30, 2019
2a19515
Merge remote-tracking branch 'ChunChiehChang/casia' into yomdle2
May 1, 2019
708e8c3
Merge remote-tracking branch 'upstream/master' into yomdle2
Aug 20, 2019
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions egs/casia_hwdb/v1/cmd.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# you can change cmd.sh depending on what type of queue you are using.
# If you have no queueing system and want to run on a local machine, you
# can change all instances 'queue.pl' to run.pl (but be careful and run
# commands one by one: most recipes will exhaust the memory on your
# machine). queue.pl works with GridEngine (qsub). slurm.pl works
# with slurm. Different queues are configured differently, with different
# queue names and different ways of specifying things like memory;
# to account for these differences you can create and edit the file
# conf/queue.conf to match your queue's configuration. Search for
# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.

export cmd="queue.pl"
1 change: 1 addition & 0 deletions egs/casia_hwdb/v1/image
37 changes: 37 additions & 0 deletions egs/casia_hwdb/v1/local/augment_data.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/bash
# Copyright 2018 Hossein Hadian
# 2018 Ashish Arora

# Apache 2.0
# This script performs data augmentation.

nj=4
cmd=run.pl
feat_dim=40
fliplr=false
verticle_shift=0
echo "$0 $@"

. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh || exit 1;

srcdir=$1
outdir=$2
datadir=$3

mkdir -p $datadir/augmentations
echo "copying $srcdir to $datadir/augmentations/aug1, allowed length, creating feats.scp"

for set in aug1; do
image/copy_data_dir.sh --spk-prefix $set- --utt-prefix $set- \
$srcdir $datadir/augmentations/$set
cat $srcdir/allowed_lengths.txt > $datadir/augmentations/$set/allowed_lengths.txt
local/extract_features.sh --nj $nj --cmd "$cmd" --feat-dim $feat_dim \
--vertical-shift $verticle_shift \
--fliplr $fliplr --augment 'random_scale' $datadir/augmentations/$set
done

echo " combine original data and data from different augmentations"
utils/combine_data.sh --extra-files images.scp $outdir $srcdir $datadir/augmentations/aug1
cat $srcdir/allowed_lengths.txt > $outdir/allowed_lengths.txt
67 changes: 67 additions & 0 deletions egs/casia_hwdb/v1/local/chain/compare_wer.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
#!/bin/bash

# this script is used for comparing decoding results between systems.
# e.g. local/chain/compare_wer.sh exp/chain/cnn{1a,1b}

# Copyright 2017 Chun Chieh Chang
# 2017 Ashish Arora

if [ $# == 0 ]; then
echo "Usage: $0: <dir1> [<dir2> ... ]"
echo "e.g.: $0 exp/chain/cnn{1a,1b}"
exit 1
fi

echo "# $0 $*"
used_epochs=false

echo -n "# System "
for x in $*; do printf "% 10s" " $(basename $x)"; done
echo

echo -n "# WER "
for x in $*; do
wer=$(cat $x/decode_test/scoring_kaldi/best_wer | awk '{print $2}')
printf "% 10s" $wer
done
echo

echo -n "# CER "
for x in $*; do
cer=$(cat $x/decode_test/scoring_kaldi/best_cer | awk '{print $2}')
printf "% 10s" $cer
done
echo


if $used_epochs; then
exit 0; # the diagnostics aren't comparable between regular and discriminatively trained systems.
fi

echo -n "# Final train prob "
for x in $*; do
prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Final valid prob "
for x in $*; do
prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Final train prob (xent) "
for x in $*; do
prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo

echo -n "# Final valid prob (xent) "
for x in $*; do
prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
printf "% 10s" $prob
done
echo
231 changes: 231 additions & 0 deletions egs/casia_hwdb/v1/local/chain/run_cnn_e2eali_1b.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,231 @@
#!/bin/bash

# e2eali_1b is the same as chainali_1a but uses the e2e chain model to get the
# lattice alignments and to build a tree

# ./local/chain/compare_wer.sh exp_yomdle_chinese/chain/e2e_cnn_1a exp_yomdle_chinese/chain/cnn_e2eali_1b
# System e2e_cnn_1a cnn_e2eali_1b
# CER 15.44 13.57
# Final train prob 0.0616 -0.0512
# Final valid prob 0.0390 -0.0718
# Final train prob (xent) -0.6199
# Final valid prob (xent) -0.7448

set -e -o pipefail

data_dir=data
exp_dir=exp

stage=0

nj=30
train_set=train
nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium.
affix=_1b #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
common_egs_dir=
reporting_email=

# chain options
train_stage=-10
xent_regularize=0.1
frame_subsampling_factor=4
# training chunk-options
chunk_width=340,300,200,100
num_leaves=2000
# we don't need extra left/right context for TDNN systems.
chunk_left_context=0
chunk_right_context=0
tdnn_dim=450
# training options
srand=0
remove_egs=true
lang_test=lang_test
# End configuration section.
echo "$0 $@" # Print the command line for logging


. ./cmd.sh
. ./path.sh
. ./utils/parse_options.sh


if ! cuda-compiled; then
cat <<EOF && exit 1
This script is intended to be used with GPUs but you have not compiled Kaldi with CUDA
If you want to use GPUs (and have them), go to src/, and configure and make on a machine
where "nvcc" is installed.
EOF
fi

e2echain_model_dir=$exp_dir/chain/e2e_cnn_1a
ali_dir=$exp_dir/chain/e2e_ali_train
lat_dir=$exp_dir/chain${nnet3_affix}/e2e_${train_set}_lats
dir=$exp_dir/chain${nnet3_affix}/cnn_e2eali${affix}
train_data_dir=$data_dir/${train_set}
tree_dir=$exp_dir/chain${nnet3_affix}/tree_e2e

# the 'lang' directory is created by this script.
# If you create such a directory with a non-standard topology
# you should probably name it differently.
lang=$data_dir/lang_chain
for f in $train_data_dir/feats.scp $ali_dir/ali.1.gz $ali_dir/final.mdl; do
[ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
done


if [ $stage -le 1 ]; then
echo "$0: creating lang directory $lang with chain-type topology"
cp -r $data_dir/lang $lang
silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
# Use our special topology... note that later on may have to tune this
# topology.
steps/nnet3/chain/gen_topo.py $nonsilphonelist $silphonelist >$lang/topo
fi

if [ $stage -le 2 ]; then
# Get the alignments as lattices (gives the chain training more freedom).
# use the same num-jobs as the alignments
steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
--acoustic-scale 1.0 \
--scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
${train_data_dir} $data_dir/lang $e2echain_model_dir $lat_dir
echo "" >$lat_dir/splice_opts

fi

if [ $stage -le 3 ]; then
# Build a tree using our new topology. We know we have alignments for the
# speed-perturbed data (local/nnet3/run_ivector_common.sh made them), so use
# those. The num-leaves is always somewhat less than the num-leaves from
# the GMM baseline.
if [ -f $tree_dir/final.mdl ]; then
echo "$0: $tree_dir/final.mdl already exists, refusing to overwrite it."
fi

steps/nnet3/chain/build_tree.sh \
--frame-subsampling-factor $frame_subsampling_factor \
--alignment-subsampling-factor 1 \
--context-opts "--context-width=2 --central-position=1" \
--cmd "$cmd" $num_leaves ${train_data_dir} \
$lang $ali_dir $tree_dir
fi


if [ $stage -le 4 ]; then
mkdir -p $dir
echo "$0: creating neural net configs using the xconfig parser";
num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
cnn_opts="l2-regularize=0.075"
tdnn_opts="l2-regularize=0.075"
output_opts="l2-regularize=0.1"
common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=32"
common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=128"
common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=512"
mkdir -p $dir/configs
cat <<EOF > $dir/configs/network.xconfig
input dim=180 name=input

conv-relu-batchnorm-layer name=cnn1 height-in=60 height-out=60 time-offsets=-3,-2,-1,0,1,2,3 $common1
conv-relu-batchnorm-layer name=cnn2 height-in=60 height-out=60 time-offsets=-3,-2,-1,0,1,2,3 $common1
conv-relu-batchnorm-layer name=cnn3 height-in=60 height-out=30 time-offsets=-2,-1,0,1,2 $common1 height-subsample-out=2
conv-relu-batchnorm-layer name=cnn4 height-in=30 height-out=30 time-offsets=-4,-2,0,2,4 $common2
conv-relu-batchnorm-layer name=cnn5 height-in=30 height-out=30 time-offsets=-4,-2,0,2,4 $common2
conv-relu-batchnorm-layer name=cnn6 height-in=30 height-out=15 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
conv-relu-batchnorm-layer name=cnn7 height-in=15 height-out=15 time-offsets=-4,0,4 $common3
conv-relu-batchnorm-layer name=cnn8 height-in=15 height-out=15 time-offsets=-4,0,4 $common3
conv-relu-batchnorm-layer name=cnn9 height-in=15 height-out=15 time-offsets=-4,0,4 $common3
relu-batchnorm-layer name=tdnn1 input=Append(-8,-4,0,4,8) dim=$tdnn_dim $tdnn_opts
relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts

## adding the layers for chain branch
relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts

# adding the layers for xent branch
# This block prints the configs for a separate output that will be
# trained with a cross-entropy objective in the 'chain' mod?els... this
# has the effect of regularizing the hidden parts of the model. we use
# 0.5 / args.xent_regularize as the learning rate factor- the factor of
# 0.5 / args.xent_regularize is suitable as it means the xent
# final-layer learns at a rate independent of the regularization
# constant; and the 0.5 was tuned so as to make the relative progress
# similar in the xent and regular final layers.
relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts
output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
EOF
steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
fi


if [ $stage -le 5 ]; then
if [[ $(hostname -f) == *.clsp.jhu.edu ]] && [ ! -d $dir/egs/storage ]; then
utils/create_split_dir.pl \
/export/b0{3,4,5,6}/$USER/kaldi-data/egs/iam-$(date +'%m_%d_%H_%M')/s5/$dir/egs/storage $dir/egs/storage
fi

steps/nnet3/chain/train.py --stage=$train_stage \
--cmd="$cmd" \
--feat.cmvn-opts="--norm-means=false --norm-vars=false" \
--chain.xent-regularize $xent_regularize \
--chain.leaky-hmm-coefficient=0.1 \
--chain.l2-regularize=0.00005 \
--chain.apply-deriv-weights=false \
--chain.lm-opts="--ngram-order=2 --no-prune-ngram-order=1 --num-extra-lm-states=1500" \
--chain.frame-subsampling-factor=$frame_subsampling_factor \
--chain.alignment-subsampling-factor=1 \
--chain.left-tolerance 3 \
--chain.right-tolerance 3 \
--trainer.srand=$srand \
--trainer.max-param-change=2.0 \
--trainer.num-epochs=6 \
--trainer.frames-per-iter=1000000 \
--trainer.optimization.num-jobs-initial=4 \
--trainer.optimization.num-jobs-final=8 \
--trainer.optimization.initial-effective-lrate=0.001 \
--trainer.optimization.final-effective-lrate=0.0001 \
--trainer.optimization.shrink-value=1.0 \
--trainer.num-chunk-per-minibatch=16,8 \
--trainer.optimization.momentum=0.0 \
--egs.chunk-width=$chunk_width \
--egs.chunk-left-context=$chunk_left_context \
--egs.chunk-right-context=$chunk_right_context \
--egs.chunk-left-context-initial=0 \
--egs.chunk-right-context-final=0 \
--egs.dir="$common_egs_dir" \
--egs.opts="--frames-overlap-per-eg 0 --constrained false" \
--cleanup.remove-egs=$remove_egs \
--use-gpu=wait \
--reporting.email="$reporting_email" \
--feat-dir=$train_data_dir \
--tree-dir=$tree_dir \
--lat-dir=$lat_dir \
--dir=$dir || exit 1;
fi

if [ $stage -le 6 ]; then
# The reason we are using data/lang here, instead of $lang, is just to
# emphasize that it's not actually important to give mkgraph.sh the
# lang directory with the matched topology (since it gets the
# topology file from the model). So you could give it a different
# lang directory, one that contained a wordlist and LM of your choice,
# as long as phones.txt was compatible.

utils/mkgraph.sh \
--self-loop-scale 1.0 $data_dir/$lang_test \
$dir $dir/graph || exit 1;
fi

if [ $stage -le 7 ]; then
frames_per_chunk=$(echo $chunk_width | cut -d, -f1)
steps/nnet3/decode.sh --acwt 1.0 --post-decode-acwt 10.0 \
--extra-left-context $chunk_left_context \
--extra-right-context $chunk_right_context \
--extra-left-context-initial 0 \
--extra-right-context-final 0 \
--frames-per-chunk $frames_per_chunk \
--nj $nj --cmd "$cmd" \
$dir/graph $data_dir/test $dir/decode_test || exit 1;
fi
Loading