diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_1a.sh index e7f095243db..a3a98ce5ad5 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_1a.sh @@ -173,7 +173,7 @@ if [ $stage -le 5 ]; then --chain.alignment-subsampling-factor=$frame_subsampling_factor \ --trainer.srand=$srand \ --trainer.max-param-change=2.0 \ - --trainer.num-epochs=2 \ + --trainer.num-epochs=4 \ --trainer.frames-per-iter=1000000 \ --trainer.optimization.num-jobs-initial=3 \ --trainer.optimization.num-jobs-final=16 \ diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh index 0f166860c23..b652eab034a 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_chainali_1a.sh @@ -9,7 +9,7 @@ train_set=train gmm=tri3 # this is the source gmm-dir that we'll use for alignments; it # should have alignments for the specified training data. nnet3_affix= # affix for exp dirs, e.g. it was _cleaned in tedlium. -affix=_1b #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. +affix=_1a #affix for TDNN+LSTM directory e.g. "1a" or "1b", in case we change the configuration. ali=tri3_ali chain_model_dir=exp/chain${nnet3_affix}/cnn_1a common_egs_dir= @@ -175,9 +175,9 @@ if [ $stage -le 5 ]; then --chain.alignment-subsampling-factor=1 \ --trainer.srand=$srand \ --trainer.max-param-change=2.0 \ - --trainer.num-epochs=2 \ + --trainer.num-epochs=4 \ --trainer.frames-per-iter=1000000 \ - --trainer.optimization.num-jobs-initial=8 \ + --trainer.optimization.num-jobs-initial=3 \ --trainer.optimization.num-jobs-final=16 \ --trainer.optimization.initial-effective-lrate=0.001 \ --trainer.optimization.final-effective-lrate=0.0001 \ diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh index d2649f70fe6..38387ce2fcc 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1a.sh @@ -180,7 +180,7 @@ if [ $stage -le 5 ]; then --trainer.max-param-change=2.0 \ --trainer.num-epochs=2 \ --trainer.frames-per-iter=1000000 \ - --trainer.optimization.num-jobs-initial=8 \ + --trainer.optimization.num-jobs-initial=3 \ --trainer.optimization.num-jobs-final=16 \ --trainer.optimization.initial-effective-lrate=0.001 \ --trainer.optimization.final-effective-lrate=0.0001 \ diff --git a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1b.sh b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1b.sh index 4be90978566..75c246f5ffe 100755 --- a/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1b.sh +++ b/egs/madcat_ar/v1/local/chain/run_cnn_e2eali_1b.sh @@ -1,6 +1,6 @@ #!/bin/bash -# e2eali_1a is the same as chainali_1c but uses the e2e chain model to get the +# e2eali_1b is the same as chainali_1a but uses the e2e chain model to get the # lattice alignments and to build a tree # local/chain/compare_wer.sh exp/chain/exp/chain/cnn_e2eali_1b @@ -192,9 +192,9 @@ if [ $stage -le 5 ]; then --chain.right-tolerance 3 \ --trainer.srand=$srand \ --trainer.max-param-change=2.0 \ - --trainer.num-epochs=2 \ + --trainer.num-epochs=4 \ --trainer.frames-per-iter=1000000 \ - --trainer.optimization.num-jobs-initial=8 \ + --trainer.optimization.num-jobs-initial=3 \ --trainer.optimization.num-jobs-final=16 \ --trainer.optimization.initial-effective-lrate=0.001 \ --trainer.optimization.final-effective-lrate=0.0001 \ diff --git a/egs/madcat_ar/v1/local/create_line_image_from_page_image.py b/egs/madcat_ar/v1/local/create_line_image_from_page_image.py index b4dfe5d57e9..92dbe6752f0 100755 --- a/egs/madcat_ar/v1/local/create_line_image_from_page_image.py +++ b/egs/madcat_ar/v1/local/create_line_image_from_page_image.py @@ -412,7 +412,7 @@ def set_line_image_data(image, line_id, image_file_name, image_fh): def get_line_images_from_page_image(image_file_name, madcat_file_path, image_fh): """ Given a page image, extracts the line images from it. - Inout + Input ----- image_file_name (string): complete path and name of the page image. madcat_file_path (string): complete path and name of the madcat xml file diff --git a/egs/madcat_ar/v1/run.sh b/egs/madcat_ar/v1/run.sh index b8513c0b32c..c98ecb60ef7 100755 --- a/egs/madcat_ar/v1/run.sh +++ b/egs/madcat_ar/v1/run.sh @@ -47,16 +47,13 @@ fi if [ $stage -le 2 ]; then echo "$0: Preparing data..." - local/prepare_data.sh --download_dir1 $download_dir1 \ - --download_dir2 $download_dir2 --download_dir3 $download_dir3 \ - --train_split_file $train_split_file --test_split_file $test_split_file \ - --dev_split_file $dev_split_file + local/prepare_data.sh fi mkdir -p data/{train,test,dev}/data if [ $stage -le 3 ]; then - for dataset in test train dev; do + for dataset in test train; do local/extract_features.sh --nj $nj --cmd $cmd --feat-dim 40 data/$dataset steps/compute_cmvn_stats.sh data/$dataset || exit 1; done