chenzhehuai · chenzhehuai · Aug 3, 2018 · Apr 3, 2018 · Apr 3, 2018 · Apr 3, 2018
diff --git a/.gitignore b/.gitignore
@@ -101,6 +101,8 @@ GSYMS
 /tools/openfst-1.6.2/
 /tools/openfst-1.6.5.tar.gz
 /tools/openfst-1.6.5/
+/tools/openfst-1.6.7.tar.gz
+/tools/openfst-1.6.7/
 /tools/BeamformIt/
 /tools/libsndfile-1.0.25.tar.gz
 /tools/libsndfile-1.0.25/

diff --git a/.travis.yml b/.travis.yml
@@ -21,6 +21,7 @@ addons:
       - gfortran-4.9
       - liblapack-dev
       - clang-3.8
+      - sox
 
 branches:
   only:
@@ -47,7 +48,7 @@ script:
   # http://peter.eisentraut.org/blog/2014/12/01/ccache-and-clang-part-3/
   # for the explanation why extra switches needed for clang with ccache.
   - CXX="ccache clang++-3.8 -Qunused-arguments -fcolor-diagnostics -Wno-tautological-compare"
-    CFLAGS="-march=native"
+    CFLAGS=""
     LDFLAGS="-llapack"
     INCDIRS="$XROOT/usr/include"
     LIBDIRS="$XROOT/usr/lib"

diff --git a/COPYING b/COPYING
@@ -56,7 +56,7 @@ contributors and original source material as well as the full text of the Apache
 License v 2.0 are set forth below.
 
 Individual Contributors (in alphabetical order)
-      
+
       Mohit Agarwal
       Tanel Alumae
       Gilles Boulianne
@@ -123,7 +123,7 @@ Individual Contributors (in alphabetical order)
       Haihua Xu
       Hainan Xu
       Xiaohui Zhang
-      
+
 Other Source Material
 
     This project includes a port and modification of materials from JAMA: A Java
@@ -136,9 +136,9 @@ Other Source Material
   "Signal processing with lapped transforms," Artech House, Inc., 1992.  The
   current copyright holder, Henrique S. Malvar, has given his permission for the
   release of this modified version under the Apache License 2.0.
-  
-  This project includes material from the OpenFST Library v1.2.7 available at 
-  http://www.openfst.org and released under the Apache License v. 2.0.   
+
+  This project includes material from the OpenFST Library v1.2.7 available at
+  http://www.openfst.org and released under the Apache License v. 2.0.
 
   [OpenFst COPYING file begins here]
 
@@ -147,7 +147,7 @@ Other Source Material
     You may obtain a copy of the License at
 
        http://www.apache.org/licenses/LICENSE-2.0
- 
+
     Unless required by applicable law or agreed to in writing, software
     distributed under the License is distributed on an "AS IS" BASIS,
     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.

diff --git a/README.md b/README.md
@@ -1,5 +1,4 @@
-[![Build Status](https://travis-ci.org/kaldi-asr/kaldi.svg?branch=master)](https://travis-ci.org/kaldi-asr/kaldi)
-
+[![Build Status](https://travis-ci.com/kaldi-asr/kaldi.svg?branch=master)](https://travis-ci.com/kaldi-asr/kaldi)
 Kaldi Speech Recognition Toolkit
 ================================
 

diff --git a/egs/aishell/s5/local/aishell_train_lms.sh b/egs/aishell/s5/local/aishell_train_lms.sh
@@ -23,7 +23,7 @@ kaldi_lm=`which train_lm.sh`
 if [ -z $kaldi_lm ]; then
   echo "$0: train_lm.sh is not found. That might mean it's not installed"
   echo "$0: or it is not added to PATH"
-  echo "$0: Use the script tools/extra/install_kaldi_lm.sh to install it"
+  echo "$0: Use the script tools/extras/install_kaldi_lm.sh to install it"
   exit 1
 fi
 

diff --git a/egs/aishell2/README.txt b/egs/aishell2/README.txt
@@ -0,0 +1,50 @@
+# AISHELL-2
+
+AISHELL-2 is by far the largest free speech corpus available for Mandarin ASR research.
+## 1. DATA
+### training data
+* 1000 hours of speech data (around 1 million utterances)
+* 1991 speakers (845 male and 1146 female)
+* clean recording environment(studio or quiet living room)
+* read speech
+* reading prompts from various domain: entertainment, finance, technology, sports, control command, place of interest etc.
+* near field recording via 3 parallel channels(iOS, Android, Microphone).
+* iOS data is free for non-commercial research and education use (e.g. universities and colleges)
+
+### evaluation data:
+Currently we release AISHELL2-2018A-EVAL, containing:
+* dev: 2500 utterances from 5 speaker
+* test: 5000 utterances from 10 speakers
+
+you can download above evaluation set from:
+http://www.aishelltech.com/aishell_eval
+
+we may update and release other evaluation sets on the website later, targeting on different applications and senarios.
+
+## 2. RECIPE
+Based on Kaldi standard system, AISHELL-2 provides a self-contained Mandarin ASR recipe, with:
+* a word segmentation module, which is a must-have component for Chinese ASR systems
+* an open-sourced Mandarin lexicon(DaCiDian)
+* a simplified GMM training recipe
+* acoustic channel adaptation recipe(AM fine-tuning)
+
+# CONTACT
+AISHELL foundation is a non-profit online organization, with members from speech industry and research institutes.
+
+We hope AISHELL-2 corpus and recipe could be beneficial to the entire speech community.
+
+Depends on your location and internet speed, we distribute the corpus in two ways:
+* hard-disk delivery
+* cloud-disk downloading
+
+To apply for AISHELL-2 corpus for free, you need to fill in a very simple application form, confirming that:
+* university department / education institute info
+* only for non-commercial research / education use
+
+AISHELL-foundation covers all data distribution fees (including the corpus, hard-disk cost etc)
+
+Data re-distribution inside your university department is OK for convenience. However, users are not supposed to re-distribute AISHELL-2 to other universities or education institutes.
+
+To get the application form, or you come across any problem with the recipe, contact us via:
+
+aishell.foundation@gmail.com
diff --git a/egs/aishell2/s5/RESULTS b/egs/aishell2/s5/RESULTS
@@ -0,0 +1,6 @@
+%WER 44.78 [ 22176 / 49527, 370 ins, 2179 del, 19627 sub ] exp/mono/decode_test/cer_9_0.0
+%WER 24.78 [ 12271 / 49527, 394 ins, 815 del, 11062 sub ] exp/tri1/decode_test/cer_11_0.0
+%WER 22.54 [ 11165 / 49527, 390 ins, 665 del, 10110 sub ] exp/tri2/decode_test/cer_11_0.0
+%WER 19.78 [ 9795 / 49527, 313 ins, 684 del, 8798 sub ] exp/tri3/decode_test/cer_13_0.0
+
+# (Chain model results are at the beginning of corresponding scripts)
diff --git a/egs/aishell2/s5/cmd.sh b/egs/aishell2/s5/cmd.sh
@@ -0,0 +1,15 @@
+# you can change cmd.sh depending on what type of queue you are using.
+# If you have no queueing system and want to run on a local machine, you
+# can change all instances 'queue.pl' to run.pl (but be careful and run
+# commands one by one: most recipes will exhaust the memory on your
+# machine).  queue.pl works with GridEngine (qsub).  slurm.pl works
+# with slurm.  Different queues are configured differently, with different
+# queue names and different ways of specifying things like memory;
+# to account for these differences you can create and edit the file
+# conf/queue.conf to match your queue's configuration.  Search for
+# conf/queue.conf in http://kaldi-asr.org/doc/queue.html for more information,
+# or search for the string 'default_config' in utils/queue.pl or utils/slurm.pl.
+
+export train_cmd="queue.pl --mem 2G"
+export decode_cmd="queue.pl --mem 4G"
+export mkgraph_cmd="queue.pl --mem 8G"
diff --git a/egs/aishell2/s5/conf/decode.conf b/egs/aishell2/s5/conf/decode.conf
@@ -0,0 +1,3 @@
+beam=11.0 # beam for decoding.  Was 13.0 in the scripts.
+first_beam=8.0 # beam for 1st-pass decoding in SAT.
+
diff --git a/egs/aishell2/s5/conf/mfcc.conf b/egs/aishell2/s5/conf/mfcc.conf
@@ -0,0 +1,2 @@
+--use-energy=false
+--sample-frequency=16000
diff --git a/egs/aishell2/s5/conf/mfcc_hires.conf b/egs/aishell2/s5/conf/mfcc_hires.conf
@@ -0,0 +1,10 @@
+# config for high-resolution MFCC features, intended for neural network training.
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false       # use average of log energy, not energy.
+--sample-frequency=16000 # AISHELL-2 is sampled at 16kHz
+--num-mel-bins=40        # similar to Google's setup.
+--num-ceps=40            # there is no dimensionality reduction.
+--low-freq=20            # low cutoff frequency for mel bins
+--high-freq=-400         # high cutoff frequency, relative to Nyquist of 8000 (=7600)
diff --git a/egs/aishell2/s5/conf/pitch.conf b/egs/aishell2/s5/conf/pitch.conf
@@ -0,0 +1 @@
+--sample-frequency=16000
diff --git a/egs/aishell2/s5/local/chain/compare_wer.sh b/egs/aishell2/s5/local/chain/compare_wer.sh
@@ -0,0 +1,80 @@
+#!/bin/bash
+
+# Copyright 2018  Emotech LTD (Author: Xuechen LIU)
+# Apache 2.0
+
+# compare wer between diff. models in aishell2 chain directory
+
+set -e
+. ./cmd.sh
+. ./path.sh
+
+if [ $# == 0 ]; then
+  echo "Usage: $0: <dir1> [<dir2> ... ]"
+  echo "e.g.: $0 exp/chain/tdnn_7h_sp"
+  exit 1
+fi
+
+echo "# $0 $*"
+
+set_names() {
+  if [ $# != 1 ]; then
+    echo "compare_wer_general.sh: internal error"
+    exit 1  # exit the program
+  fi
+  dirname=$(echo $1 | cut -d: -f1)
+}
+
+# print model names
+echo -n "# Model               "
+for x in $*; do
+  printf "% 10s" " $(basename $x)"
+done
+echo
+
+# print number of parameters
+echo -n "# Num. of params        "
+for x in $*; do
+  set_names $x
+  params=$(steps/info/chain_dir_info.pl "$x" | grep -o 'num-params=[0-9]*\.[0-9]*M' | cut -d'=' -f2-)
+  printf "% 10s\n" $params
+done
+
+# print decode WER results
+echo -n "# WER(%)               "
+for x in $*; do
+  set_names $x
+  wer=$([ -d $x ] && grep WER $x/decode_test/cer_* | utils/best_wer.sh | awk '{print $2}')
+  printf "% 10s" $wer
+done
+echo
+
+# print final log prob for train & validation
+echo -n "# Final train prob     "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -v xent | awk '{printf($8)}' | cut -c1-7)
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob     "
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -v xent | awk '{printf($8)}' | cut -c1-7)
+  printf "% 10s" $prob
+done
+echo
+
+# do the same for xent objective
+echo -n "# Final train prob (xent)"
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_train.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
+
+echo -n "# Final valid prob (xent)"
+for x in $*; do
+  prob=$(grep Overall $x/log/compute_prob_valid.final.log | grep -w xent | awk '{printf("%.4f", $8)}')
+  printf "% 10s" $prob
+done
+echo
diff --git a/egs/aishell2/s5/local/chain/run_tdnn.sh b/egs/aishell2/s5/local/chain/run_tdnn.sh
@@ -0,0 +1 @@
+tuning/run_tdnn_1b.sh
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		beam=11.0 # beam for decoding. Was 13.0 in the scripts.
		first_beam=8.0 # beam for 1st-pass decoding in SAT.