kaldi-asr · danpovey · Nov 26, 2018 · Aug 31, 2018 · Sep 13, 2018 · Sep 24, 2018
diff --git a/egs/reverb/s5/RESULTS b/egs/reverb/s5/RESULTS
diff --git a/egs/reverb/s5/conf/decode_dnn.config b/egs/reverb/s5/conf/decode_dnn.config
diff --git a/egs/reverb/s5/conf/fbank.conf b/egs/reverb/s5/conf/fbank.conf
diff --git a/egs/reverb/s5/conf/mfcc_hires.conf b/egs/reverb/s5/conf/mfcc_hires.conf
@@ -0,0 +1,10 @@
+# config for high-resolution MFCC features, intended for neural network training.
+# Note: we keep all cepstra, so it has the same info as filterbank features,
+# but MFCC is more easily compressible (because less correlated) which is why
+# we prefer this method.
+--use-energy=false   # use average of log energy, not energy.
+--sample-frequency=16000 
+--num-mel-bins=40
+--num-ceps=40
+--low-freq=40
+--high-freq=-400
diff --git a/egs/reverb/s5/conf/online_cmvn.conf b/egs/reverb/s5/conf/online_cmvn.conf
@@ -0,0 +1 @@
+# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh
diff --git a/egs/reverb/s5/conf/reverb_beamformit.cfg b/egs/reverb/s5/conf/reverb_beamformit.cfg
@@ -0,0 +1,50 @@
+#BeamformIt sample configuration file for AMI data (http://groups.inf.ed.ac.uk/ami/download/)
+
+# scrolling size to compute the delays
+scroll_size = 250
+
+# cross correlation computation window size
+window_size = 500
+
+#amount of maximum points for the xcorrelation taken into account
+nbest_amount = 4
+
+#flag wether to apply an automatic noise thresholding 
+do_noise_threshold = 1
+
+#Percentage of frames with lower xcorr taken as noisy
+noise_percent = 10
+
+######## acoustic modelling parameters
+
+#transition probabilities weight for multichannel decoding
+trans_weight_multi = 25
+trans_weight_nbest = 25
+
+###
+
+#flag wether to print the feaures after setting them, or not
+print_features = 1
+
+#flag wether to use the bad frames in the sum process
+do_avoid_bad_frames = 1
+
+#flag to use the best channel (SNR) as a reference
+#defined from command line
+do_compute_reference = 1
+
+#flag wether to use a uem file or not(process all the file)
+do_use_uem_file = 0
+
+#flag wether to use an adaptative weights scheme or fixed weights
+do_adapt_weights = 1
+
+#flag wether to output the sph files or just run the system to create the auxiliary files
+do_write_sph_files = 1
+
+####directories where to store/retrieve info####
+#channels_file = ./cfg-files/channels
+
+#show needs to be passed as argument normally, here a default one is given just in case
+#show_id = Ttmp
+
diff --git a/egs/reverb/s5/local/Generate_mcTrainData_cut.m b/egs/reverb/s5/local/Generate_mcTrainData_cut.m
@@ -1,13 +1,13 @@
 function Generate_mcTrainData_cut(WSJ_dir_name, save_dir)
 %
 % Input variables:
-%    WSJ_dir_name: string name of user's clean wsjcam0 corpus directory 
-%                  (*Directory structure for wsjcam0 corpushas to be kept as it is after obtaining it from LDC. 
+%    WSJ_dir_name: string name of WAV file directory converted from original wsjcam0 SPHERE files
+%                  (*Directory structure for wsjcam0 corpus to be kept as it is after obtaining it from LDC. 
 %                    Otherwise this script does not work.)
 %
 % This function generates multi-condition traiing data
 % based on the following items:
-%  1. wsjcam0 corpus (distributed from the LDC)
+%  1. wsjcam0 corpus (WAV files)
 %  2. room impulse responses (ones under ./RIR/)
 %  3. noise (ones under ./NOISE/).
 % Generated data has the same directory structure as original wsjcam0 corpus. 
@@ -26,8 +26,6 @@ function Generate_mcTrainData_cut(WSJ_dir_name, save_dir)
 
 display(['Name of directory for original wsjcam0: ',WSJ_dir_name])
 display(['Name of directory to save generated multi-condition training data: ',save_dir])
-unix(['chmod u+x sphere_to_wave.csh']);
-unix(['chmod u+x bin/*']);
 
 % Parameters related to acoustic conditions
 SNRdB=20;
@@ -89,7 +87,6 @@ function Generate_mcTrainData_cut(WSJ_dir_name, save_dir)
     save_dir_tr=[save_dir,'/data/mc_train/'];
 end
 mkdir([save_dir_tr]);
-%mkdir([save_dir,'/taskfiles/'])
 
 mic_idx=['A';'B';'C';'D';'E';'F';'G';'H'];
 prev_fname='dummy';
@@ -114,13 +111,12 @@ function Generate_mcTrainData_cut(WSJ_dir_name, save_dir)
         end
         prev_fname=fname(1:idx1(end));
 
-        % load (sphere format) speech signal 
-        x=read_sphere([WSJ_dir_name,'/data/', fname]);
-        x=x/(2^15);  % conversion from short-int to float
+        % load speech signal
+        x=audioread([WSJ_dir_name, '/data/', fname, '.wav'])';
 
         % load RIR and noise for "THIS" utterance
-        eval(['RIR=wavread(RIR_sim',num2str(rcount),');']);
-        eval(['NOISE=wavread([noise_sim',num2str(ceil(rcount/4)),',''_',num2str(ncount),'.wav'']);']);
+        eval(['RIR=audioread(RIR_sim',num2str(rcount),');']);
+        eval(['NOISE=audioread([noise_sim',num2str(ceil(rcount/4)),',''_',num2str(ncount),'.wav'']);']);
 
         % Generate 8ch noisy reverberant data        
         y=gen_obs(x,RIR,NOISE,SNRdB);
@@ -138,8 +134,9 @@ function Generate_mcTrainData_cut(WSJ_dir_name, save_dir)
         y=y/4; % common normalization to all the data to prevent clipping
                % denominator was decided experimentally
 
-        for ch=1:8 
-            eval(['wavwrite(y(:,',num2str(ch),'),16000,''',save_dir_tr fname,'_ch',num2str(ch),'.wav'');']);
+        for ch=1:8
+	    outfilename = [save_dir_tr, fname, '_ch', num2str(ch), '.wav'];
+            eval(['audiowrite(outfilename, y(:,',num2str(ch),'), 16000);']);
         end
 
         display(['sentence ',num2str(fcount),' (out of 7861) finished! (Multi-condition training data)'])

diff --git a/egs/reverb/s5/local/REVERB_create_mcdata.sh b/egs/reverb/s5/local/REVERB_create_mcdata.sh
diff --git a/egs/reverb/s5/local/REVERB_mcwsjav_data_prep.sh b/egs/reverb/s5/local/REVERB_mcwsjav_data_prep.sh
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		# configuration file for apply-cmvn-online, used in the script ../local/run_online_decoding.sh