Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions egs/iam/v1/local/chain/run_cnn_1a.sh
Original file line number Diff line number Diff line change
Expand Up @@ -89,15 +89,15 @@ if [ $stage -le 1 ]; then
# topo file. [note, it really has two states.. the first one is only repeated
# once, the second one has zero or more repeats.]
if [ -d $lang ]; then
if [ $lang/L.fst -nt data/$lang_test/L.fst ]; then
if [ $lang/L.fst -nt data/lang/L.fst ]; then
echo "$0: $lang already exists, not overwriting it; continuing"
else
echo "$0: $lang already exists and seems to be older than data/lang..."
echo " ... not sure what to do. Exiting."
exit 1;
fi
else
cp -r data/$lang_test $lang
cp -r data/lang $lang
silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
# Use our special topology... note that later on may have to tune this
Expand All @@ -110,7 +110,7 @@ if [ $stage -le 2 ]; then
# Get the alignments as lattices (gives the chain training more freedom).
# use the same num-jobs as the alignments
steps/align_fmllr_lats.sh --nj $nj --cmd "$cmd" ${train_data_dir} \
data/$lang_test $gmm_dir $lat_dir
data/lang $gmm_dir $lat_dir
rm $lat_dir/fsts.*.gz # save space
fi

Expand Down
23 changes: 4 additions & 19 deletions egs/iam/v1/local/chain/run_cnn_chainali_1a.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,6 @@

# chainali_1a is as 1a except it uses chain alignments (using 1a system) instead of gmm alignments

# ./local/chain/compare_wer.sh exp/chain/cnn_chainali_1a/ exp/chain/cnn_1a/
# System cnn_chainali_1a cnn_1a
# WER 6.69 9.13
# Final train prob -0.0128 -0.0297
# Final valid prob -0.0447 -0.0975
# Final train prob (xent) -0.6448 -0.5915
# Final valid prob (xent) -0.9924 -1.0022

# steps/info/chain_dir_info.pl exp/chain/cnn_chainali_1a/
# exp/chain/cnn_chainali_1a/: num-iters=21 nj=2..4 num-params=4.4M dim=40->364 combine=-0.002->0.000 xent:train/valid[13,20,final]=(-0.929,-0.711,-0.645/-1.16,-1.04,-0.992) logprob:train/valid[13,20,final]=(-0.029,-0.016,-0.013/-0.051,-0.047,-0.045)

# cat exp/chain/cnn_chainali_1a/decode_test/scoring_kaldi/best_*
# %WER 3.94 [ 2600 / 65921, 549 ins, 837 del, 1214 sub ] exp/chain/cnn_chainali_1a/decode_test/cer_15_0.0
# %WER 6.69 [ 1241 / 18542, 135 ins, 358 del, 748 sub ] exp/chain/cnn_chainali_1a/decode_test/wer_15_0.5

set -e -o pipefail

stage=0
Expand All @@ -28,7 +13,7 @@ gmm=tri3 # this is the source gmm-dir that we'll use for alignments; it
nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium.
affix=_1a #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
ali=tri3_ali
chain_model_dir=exp/chain${nnet3_affix}/cnn${affix}
chain_model_dir=exp/chain${nnet3_affix}/cnn_1a
common_egs_dir=
reporting_email=

Expand Down Expand Up @@ -90,15 +75,15 @@ if [ $stage -le 1 ]; then
# topo file. [note, it really has two states.. the first one is only repeated
# once, the second one has zero or more repeats.]
if [ -d $lang ]; then
if [ $lang/L.fst -nt data/$lang_test/L.fst ]; then
if [ $lang/L.fst -nt data/lang/L.fst ]; then
echo "$0: $lang already exists, not overwriting it; continuing"
else
echo "$0: $lang already exists and seems to be older than data/lang..."
echo " ... not sure what to do. Exiting."
exit 1;
fi
else
cp -r data/$lang_test $lang
cp -r data/lang $lang
silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
# Use our special topology... note that later on may have to tune this
Expand All @@ -112,7 +97,7 @@ if [ $stage -le 2 ]; then
# use the same num-jobs as the alignments
steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
--scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
${train_data_dir} data/$lang_test $chain_model_dir $lat_dir
${train_data_dir} data/lang $chain_model_dir $lat_dir
cp $gmm_lat_dir/splice_opts $lat_dir/splice_opts
fi

Expand Down
8 changes: 4 additions & 4 deletions egs/iam/v1/local/chain/run_cnn_chainali_1b.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ gmm=tri3 # this is the source gmm-dir that we'll use for alignments; it
nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium.
affix=_1b #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
ali=tri3_ali
chain_model_dir=exp/chain${nnet3_affix}/cnn${affix}
chain_model_dir=exp/chain${nnet3_affix}/cnn_1a
common_egs_dir=
reporting_email=

Expand Down Expand Up @@ -89,15 +89,15 @@ if [ $stage -le 1 ]; then
# topo file. [note, it really has two states.. the first one is only repeated
# once, the second one has zero or more repeats.]
if [ -d $lang ]; then
if [ $lang/L.fst -nt data/$lang_test/L.fst ]; then
if [ $lang/L.fst -nt data/lang/L.fst ]; then
echo "$0: $lang already exists, not overwriting it; continuing"
else
echo "$0: $lang already exists and seems to be older than data/lang..."
echo " ... not sure what to do. Exiting."
exit 1;
fi
else
cp -r data/$lang_test $lang
cp -r data/lang $lang
silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
# Use our special topology... note that later on may have to tune this
Expand All @@ -111,7 +111,7 @@ if [ $stage -le 2 ]; then
# use the same num-jobs as the alignments
steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
--scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
${train_data_dir} data/$lang_test $chain_model_dir $lat_dir
${train_data_dir} data/lang $chain_model_dir $lat_dir
cp $gmm_lat_dir/splice_opts $lat_dir/splice_opts
fi

Expand Down
40 changes: 20 additions & 20 deletions egs/iam/v1/local/chain/run_cnn_chainali_1c.sh
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,14 @@ gmm=tri3 # this is the source gmm-dir that we'll use for alignments; it
nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium.
affix=_1c #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration.
ali=tri3_ali
chain_model_dir=exp/chain${nnet3_affix}/cnn${affix}
chain_model_dir=exp/chain${nnet3_affix}/cnn_1a
common_egs_dir=
reporting_email=

# chain options
train_stage=-10
xent_regularize=0.1
frame_subsampling_factor=4
alignment_subsampling_factor=1
# training chunk-options
chunk_width=340,300,200,100
num_leaves=500
Expand Down Expand Up @@ -75,7 +74,6 @@ tree_dir=exp/chain${nnet3_affix}/tree_chain
# you should probably name it differently.
lang=data/lang_chain
for f in $train_data_dir/feats.scp \
$train_data_dir/feats.scp $gmm_dir/final.mdl \
$ali_dir/ali.1.gz $gmm_dir/final.mdl; do
[ ! -f $f ] && echo "$0: expected file $f to exist" && exit 1
done
Expand All @@ -87,15 +85,15 @@ if [ $stage -le 1 ]; then
# topo file. [note, it really has two states.. the first one is only repeated
# once, the second one has zero or more repeats.]
if [ -d $lang ]; then
if [ $lang/L.fst -nt data/$lang_test/L.fst ]; then
if [ $lang/L.fst -nt data/lang/L.fst ]; then
echo "$0: $lang already exists, not overwriting it; continuing"
else
echo "$0: $lang already exists and seems to be older than data/lang..."
echo " ... not sure what to do. Exiting."
exit 1;
fi
else
cp -r data/$lang_test $lang
cp -r data/lang $lang
silphonelist=$(cat $lang/phones/silence.csl) || exit 1;
nonsilphonelist=$(cat $lang/phones/nonsilence.csl) || exit 1;
# Use our special topology... note that later on may have to tune this
Expand All @@ -109,7 +107,7 @@ if [ $stage -le 2 ]; then
# use the same num-jobs as the alignments
steps/nnet3/align_lats.sh --nj $nj --cmd "$cmd" \
--scale-opts '--transition-scale=1.0 --self-loop-scale=1.0' \
${train_data_dir} data/$lang_test $chain_model_dir $lat_dir
${train_data_dir} data/lang $chain_model_dir $lat_dir
cp $gmm_lat_dir/splice_opts $lat_dir/splice_opts
fi

Expand All @@ -136,12 +134,12 @@ if [ $stage -le 4 ]; then

num_targets=$(tree-info $tree_dir/tree | grep num-pdfs | awk '{print $2}')
learning_rate_factor=$(echo "print 0.5/$xent_regularize" | python)
opts="l2-regularize=0.075"
opts_2="l2-regularize=0.075"
opts_3="l2-regularize=0.1"
common1="$opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
common2="$opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
common3="$opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
cnn_opts="l2-regularize=0.075"
tdnn_opts="l2-regularize=0.075"
output_opts="l2-regularize=0.1"
common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
mkdir -p $dir/configs
cat <<EOF > $dir/configs/network.xconfig
input dim=40 name=input
Expand All @@ -153,13 +151,13 @@ if [ $stage -le 4 ]; then
conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $opts_2
relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $opts_2
relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $opts_2
relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $tdnn_opts
relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts

## adding the layers for chain branch
relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $opts_2
output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $opts_3
relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $tdnn_opts
output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts

# adding the layers for xent branch
# This block prints the configs for a separate output that will be
Expand All @@ -170,8 +168,8 @@ if [ $stage -le 4 ]; then
# final-layer learns at a rate independent of the regularization
# constant; and the 0.5 was tuned so as to make the relative progress
# similar in the xent and regular final layers.
relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $opts_2
output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $opts_3
relu-batchnorm-layer name=prefinal-xent input=tdnn3 dim=$tdnn_dim target-rms=0.5 $tdnn_opts
output-layer name=output-xent dim=$num_targets learning-rate-factor=$learning_rate_factor max-change=1.5 $output_opts
EOF
steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs/
fi
Expand All @@ -192,7 +190,9 @@ if [ $stage -le 5 ]; then
--chain.apply-deriv-weights=false \
--chain.lm-opts="--num-extra-lm-states=500" \
--chain.frame-subsampling-factor=$frame_subsampling_factor \
--chain.alignment-subsampling-factor=$alignment_subsampling_factor \
--chain.alignment-subsampling-factor=1 \
--chain.left-tolerance 3 \
--chain.right-tolerance 3 \
--trainer.srand=$srand \
--trainer.max-param-change=2.0 \
--trainer.num-epochs=4 \
Expand Down
45 changes: 25 additions & 20 deletions egs/iam/v1/local/chain/run_flatstart_cnn1a.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,16 @@

# local/chain/compare_wer.sh exp/chain/cnn_1a exp/chain/cnn_chainali_1c exp/chain/e2e_cnn_1a
# System cnn_1a cnn_chainali_1c e2e_cnn_1a
# WER 18.58 12.84 15.46
# CER 10.17 6.40 7.21
# Final train prob -0.0122 -0.0120 -0.0426
# Final valid prob -0.0999 -0.0199 -0.0724
# WER 18.58 12.84 14.06
# CER 10.17 6.40 6.57
# Final train prob -0.0122 -0.0120 -0.0346
# Final valid prob -0.0999 -0.0199 -0.0594
# Final train prob (xent) -0.5652 -0.9973
# Final valid prob (xent) -0.9758 -1.1537
# Parameters 4.36M 3.96M 9.13M

# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a/
# exp/chain/e2e_cnn_1a/: num-iters=21 nj=2..4 num-params=9.1M dim=40->12640 combine=-0.040->-0.040 (over 1) logprob:train/valid[13,20,final]=(-0.065,-0.046,-0.043/-0.081,-0.073,-0.072)
# steps/info/chain_dir_info.pl exp/chain/e2e_cnn_1a
# exp/chain/e2e_cnn_1a: num-iters=21 nj=2..4 num-params=9.1M dim=40->12640 combine=-0.033->-0.033 (over 1) logprob:train/valid[13,20,final]=(-0.058,-0.042,-0.035/-0.070,-0.064,-0.059)

set -e

Expand All @@ -34,8 +34,8 @@ common_egs_dir=
l2_regularize=0.00005
frames_per_iter=1000000
cmvn_opts="--norm-means=true --norm-vars=true"
train_set=train_e2e
lang_test=lang_test
train_set=train
lang_test=lang_unk

# End configuration section.
echo "$0 $@" # Print the command line for logging
Expand Down Expand Up @@ -74,19 +74,24 @@ if [ $stage -le 1 ]; then
--shared-phones true \
--type biphone \
data/$train_set $lang $treedir
cp exp/chain/e2e_base/phone_lm.fst $treedir/
$cmd $treedir/log/make_phone_lm.log \
cat data/$train_set/text \| \
steps/nnet3/chain/e2e/text_to_phones.py data/lang \| \
utils/sym2int.pl -f 2- data/lang/phones.txt \| \
chain-est-phone-lm --num-extra-lm-states=500 \
ark:- $treedir/phone_lm.fst
fi

if [ $stage -le 2 ]; then
echo "$0: creating neural net configs using the xconfig parser";
num_targets=$(tree-info $treedir/tree | grep num-pdfs | awk '{print $2}')

opts="l2-regularize=0.075"
opts_2="l2-regularize=0.075"
opts_3="l2-regularize=0.1"
common1="$opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
common2="$opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
common3="$opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
cnn_opts="l2-regularize=0.075"
tdnn_opts="l2-regularize=0.075"
output_opts="l2-regularize=0.1"
common1="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=36"
common2="$cnn_opts required-time-offsets= height-offsets=-2,-1,0,1,2 num-filters-out=70"
common3="$cnn_opts required-time-offsets= height-offsets=-1,0,1 num-filters-out=70"
mkdir -p $dir/configs
cat <<EOF > $dir/configs/network.xconfig
input dim=40 name=input
Expand All @@ -98,13 +103,13 @@ if [ $stage -le 2 ]; then
conv-relu-batchnorm-layer name=cnn5 height-in=20 height-out=10 time-offsets=-4,-2,0,2,4 $common2 height-subsample-out=2
conv-relu-batchnorm-layer name=cnn6 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
conv-relu-batchnorm-layer name=cnn7 height-in=10 height-out=10 time-offsets=-1,0,1 $common3
relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $opts_2
relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $opts_2
relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $opts_2
relu-batchnorm-layer name=tdnn1 input=Append(-4,-2,0,2,4) dim=$tdnn_dim $tdnn_opts
relu-batchnorm-layer name=tdnn2 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts
relu-batchnorm-layer name=tdnn3 input=Append(-4,0,4) dim=$tdnn_dim $tdnn_opts

## adding the layers for chain branch
relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $opts_2
output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $opts_3
relu-batchnorm-layer name=prefinal-chain dim=$tdnn_dim target-rms=0.5 $output_opts
output-layer name=output include-log-softmax=false dim=$num_targets max-change=1.5 $output_opts
EOF

steps/nnet3/xconfig_to_configs.py --xconfig-file $dir/configs/network.xconfig --config-dir $dir/configs
Expand Down
2 changes: 1 addition & 1 deletion egs/iam/v1/local/make_features.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,4 +124,4 @@ def get_scaled_image(im, allowed_lengths = None):
write_kaldi_matrix(out_fh, data, image_id)

print('Generated features for {} images. Failed for {} (iamge too '
'long).'.format(num_ok, num_fail))
'long).'.format(num_ok, num_fail), file=sys.stderr)
5 changes: 4 additions & 1 deletion egs/iam/v1/local/prepare_dict.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@

set -e
dir=data/local/dict
vocab_size=50000
. ./utils/parse_options.sh

mkdir -p $dir

# First get the set of all letters that occur in data/train/text
Expand All @@ -22,7 +25,7 @@ cat data/train/text | \

export letters=$(cat $dir/nonsilence_phones.txt | tr -d "\n")

cat data/local/local_lm/data/wordlist | \
head -n $vocab_size data/local/local_lm/data/word_count | awk '{print $2}' | \
perl -e '$letters=$ENV{letters};
while(<>){
chop;
Expand Down
7 changes: 0 additions & 7 deletions egs/iam/v1/local/train_lm.sh
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,6 @@ if [ $stage -le 1 ]; then
${dir}/data/text ${order} ${lm_dir}/work ${unpruned_lm_dir}

get_data_prob.py ${dir}/data/real_dev_set.txt ${unpruned_lm_dir} 2>&1 | grep -F '[perplexity'
#log-prob: -5.05603614242 [perplexity = 156.967086371] over 19477.0 words
fi

if [ $stage -le 2 ]; then
Expand All @@ -118,9 +117,6 @@ if [ $stage -le 2 ]; then
prune_lm_dir.py --target-num-ngrams=$size --initial-threshold=0.02 ${unpruned_lm_dir} ${dir}/data/lm_${order}_prune_big

get_data_prob.py ${dir}/data/real_dev_set.txt ${dir}/data/lm_${order}_prune_big 2>&1 | grep -F '[perplexity'
# get_data_prob.py: log-prob of data/local/local_lm/data/real_dev_set.txt given model data/local/local_lm/data/lm_3_prune_big was -5.06654404785 per word [perplexity = 158.625177948] over 19477.0 words
# current results, after adding --limit-unk-history=true:


mkdir -p ${dir}/data/arpa
format_arpa_lm.py ${dir}/data/lm_${order}_prune_big | gzip -c > ${dir}/data/arpa/${order}gram_big.arpa.gz
Expand All @@ -134,9 +130,6 @@ if [ $stage -le 3 ]; then
prune_lm_dir.py --target-num-ngrams=$size ${dir}/data/lm_${order}_prune_big ${dir}/data/lm_${order}_prune_small

get_data_prob.py ${dir}/data/real_dev_set.txt ${dir}/data/lm_${order}_prune_small 2>&1 | grep -F '[perplexity'
# get_data_prob.py: log-prob of data/local/local_lm/data/real_dev_set.txt given model data/local/local_lm/data/lm_3_prune_small was -5.24719139498 per word [perplexity = 190.031793995] over 19477.0 words
# current results, after adding --limit-unk-history=true (needed for modeling OOVs and not blowing up LG.fst):


format_arpa_lm.py ${dir}/data/lm_${order}_prune_small | gzip -c > ${dir}/data/arpa/${order}gram_small.arpa.gz
fi
Loading