Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 9 additions & 6 deletions egs/mini_librispeech/s5/local/nnet3/tuning/run_tdnn_lstm_1a.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,18 +3,21 @@
# This is a basic TDNN+LSTM nnet3 experiment.


# steps/info/nnet3_dir_info.pl exp/nnet3/tdnn_lstm1a_sp
# exp/nnet3/tdnn_lstm1a_sp: num-iters=32 nj=1..2 num-params=8.4M dim=40+100->2022 combine=-0.49->-0.39 loglike:train/valid[20,31,combined]=(-0.65,-0.41,-0.39/-1.03,-0.96,-0.97) accuracy:train/valid[20,31,combined]=(0.78,0.86,0.87/0.70,0.72,0.72)

# Below, comparing with the chain TDNN system. It's a little better with the
# small-vocab decoding. Both systems are probably super-badly tuned, and the
# chain system probably used too many jobs.
#
# local/nnet3/compare_wer.sh exp/chain/tdnn1a_sp exp/nnet3/tdnn_lstm1a_sp
# System tdnn1a_sp tdnn_lstm1a_sp
#WER dev_clean_2 (tgsmall) 18.58 17.67
#WER dev_clean_2 (tglarge) 13.35 13.43
# Final train prob -0.3660
# Final valid prob -1.0236
# Final train acc 0.8737
# Final valid acc 0.7222
#WER dev_clean_2 (tgsmall) 18.43 17.37
#WER dev_clean_2 (tglarge) 13.15 13.43
# Final train prob -0.3933
# Final valid prob -0.9662
# Final train acc 0.8652
# Final valid acc 0.7206

# Set -e here so that we catch if any executable fails immediately
set -euo pipefail
Expand Down
278 changes: 99 additions & 179 deletions egs/wsj/s5/steps/libs/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

# Copyright 2016 Vijayaditya Peddinti.
# 2016 Vimal Manohar
# 2017 Johns Hopkins University (author: Daniel Povey)
# Apache 2.0

""" This module contains several utility functions and classes that are
Expand Down Expand Up @@ -74,187 +75,107 @@ def check_if_cuda_compiled():
return True


class KaldiCommandException(Exception):
""" An Exception class that throws an error string with the
kaldi command that caused the error and the error string captured.
"""
def __init__(self, command, err=None):
Exception.__init__(self,
"There was an error while running the command "
"{0}\n{1}\n{2}".format(command, "-"*10,
"" if err is None else err))


class BackgroundProcessHandler():
""" This class handles background processes to ensure that a top-level
script waits until all the processes end before exiting

A top-level script is expected to instantiate an object of this class
and pass it to all calls of run_kaldi_command that are to be run in the
background. The background processes are queued and these are polled
in a parallel thread at set interval to check for failures.
The top-level script can ensure at the end ensure that all processes are
completed before exiting.

Attributes:
__process_queue: Stores a list of process handles and command tuples
__polling_time: The time after which the processes are polled
__timer: Internal timer object
__is_running: Stores whether a timer is running
"""

def __init__(self, polling_time=600):
self.__process_queue = []
self.__polling_time = polling_time
self.__timer = None
self.__lock = threading.Lock()
self.__is_running = False

def __run(self):
""" Internal function to run a poll. Calls poll(). """
assert(self.__is_running)
self.__is_running = False
logger.debug("Polling...")
if self.poll():
# If there are any more background processes running,
# start a new timer
self.start()

def start(self):
""" Start the background process handler.

Repeatedly calls itself through the __run() method every
__polling_time seconds.
"""
if not self.__is_running:
self.__timer = threading.Timer(self.__polling_time, self.__run)
logger.debug("Starting new timer...")
self.__is_running = True
self.__timer.start()

def stop(self):
""" Stop the background process handler by cancelling any running timer.
"""
if self.__timer is not None:
self.__timer.cancel()
self.__is_running = False

def poll(self):
""" Poll background processes and check their statuses.

Returns True if any processes are still in the queue.
"""
with self.__lock:
remaining_processes = []
for t in self.__process_queue:
if self.is_process_done(t):
self.ensure_process_is_done(t)
else:
remaining_processes.append(t)
self.__process_queue = remaining_processes
num_processes = len(self.__process_queue)
logger.debug("Number of processes remaining is {0}...".format(
num_processes))
return (num_processes > 0)

def add_process(self, t):
""" Add a (process handle, command) tuple to the queue.
"""
with self.__lock:
self.__process_queue.append(t)
self.start()

def is_process_done(self, t):
p, command = t
if p.poll() is None:
return False
return True
def execute_command(command):
""" Runs a kaldi job in the foreground and waits for it to complete; raises an
exception if its return status is nonzero. The command is executed in
'shell' mode so 'command' can involve things like pipes. Often,
'command' will start with 'run.pl' or 'queue.pl'. The stdout and stderr
are merged with the calling process's stdout and stderr so they will
appear on the screen.

def ensure_process_is_done(self, t):
p, command = t
logger.debug("Waiting for process '{0}' to end".format(command))
[stdout, stderr] = p.communicate()
if p.returncode is not 0:
raise KaldiCommandException(command, stderr)

def ensure_processes_are_done(self):
self.__process_queue.reverse()
while len(self.__process_queue) > 0:
t = self.__process_queue.pop()
self.ensure_process_is_done(t)
self.stop()

def __del__(self):
self.stop()

def debug(self):
for p, command in self.__process_queue:
logger.info("Process '{0}' is running".format(command))


def run_job(command, wait=True, background_process_handler=None):
""" Runs a kaldi job, usually using a script such as queue.pl and
run.pl, and redirects the stdout and stderr to the parent
process's streams.
These are usually a sequence of commands connected by pipes, so we use
shell=True.

Args:
background_process_handler: An object of the BackgroundProcessHandler
class that is instantiated by the top-level script. If this is
provided, then the created process handle is added to the object.
wait: If True, wait until the process is completed. However, if the
background_process_handler is provided, this option will be
ignored and the process will be run in the background.
See also: get_command_stdout, background_command
"""
p = subprocess.Popen(command, shell=True)
p.communicate()
if p.returncode is not 0:
raise Exception("Command exited with status {0}: {1}".format(
p.returncode, command))

if background_process_handler is not None:
wait = False
background_process_handler.add_process((p, command))

if wait:
p.communicate()
if p.returncode is not 0:
raise KaldiCommandException(command)
return None
else:
return p


def run_kaldi_command(command, wait=True, background_process_handler=None):
""" Runs commands frequently seen in Kaldi scripts and
captures the stdout and stderr.
These are usually a sequence of commands connected by pipes, so we use
shell=True.

Args:
background_process_handler: An object of the BackgroundProcessHandler
class that is instantiated by the top-level script. If this is
provided, then the created process handle is added to the object.
wait: If True, wait until the process is completed. However, if the
background_process_handler is provided, this option will be
ignored and the process will be run in the background.
def get_command_stdout(command, require_zero_status = True):
""" Executes a command and returns its stdout output as a string. The
command is executed with shell=True, so it may contain pipes and
other shell constructs.

If require_zero_stats is True, this function will raise an exception if
the command has nonzero exit status. If False, it just prints a warning
if the exit status is nonzero.

See also: execute_command, background_command
"""
p = subprocess.Popen(command, shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE)

if background_process_handler is not None:
wait = False
background_process_handler.add_process((p, command))

if wait:
[stdout, stderr] = p.communicate()
if p.returncode is not 0:
raise KaldiCommandException(command, stderr)
return stdout, stderr
else:
return p
stdout=subprocess.PIPE)

stdout = p.communicate()[0]
if p.returncode is not 0:
str = "Command exited with status {0}: {1}".format(
p.returncode, command)
if require_zero_status:
raise Exception(str)
else:
logger.warning(str)
return stdout




def wait_for_background_commands():
""" This waits for all threads to exit. You will often want to
run this at the end of programs that have launched background
threads, so that the program will wait for its child processes
to terminate before it dies."""
for t in threading.enumerate():
if not t == threading.current_thread():
t.join()

def background_command(command, require_zero_status = False):
"""Executes a command in a separate thread, like running with '&' in the shell.
If you want the program to die if the command eventually returns with
nonzero status, then set require_zero_status to True. 'command' will be
executed in 'shell' mode, so it's OK for it to contain pipes and other
shell constructs.

This function returns the Thread object created, just in case you want
to wait for that specific command to finish. For example, you could do:
thread = background_command('foo | bar')
# do something else while waiting for it to finish
thread.join()

See also:
- wait_for_background_commands(), which can be used
at the end of the program to wait for all these commands to terminate.
- execute_command() and get_command_stdout(), which allow you to
execute commands in the foreground.

"""

thread = threading.Thread(target=background_command_runner,
args=(command, require_zero_status))
thread.start()
return thread


def background_command_runner(command, require_zero_status):
""" This is the function that is called from background_command, in
a separate thread."""
p = subprocess.Popen(command, shell=True)

p.communicate()
if p.returncode is not 0:
str = "Command exited with status {0}: {1}".format(
p.returncode, command)
if require_zero_status:
logger.error(str)
# thread.interrupt_main() sends a KeyboardInterrupt to the main
# thread, which will generally terminate the program.
import thread
thread.interrupt_main()
else:
logger.warning(str)


def get_number_of_leaves_from_tree(alidir):
[stdout, stderr] = run_kaldi_command(
stdout = get_command_stdout(
"tree-info {0}/tree 2>/dev/null | grep num-pdfs".format(alidir))
parts = stdout.split()
assert(parts[0] == "num-pdfs")
Expand All @@ -265,7 +186,7 @@ def get_number_of_leaves_from_tree(alidir):


def get_number_of_leaves_from_model(dir):
[stdout, stderr] = run_kaldi_command(
stdout = get_command_stdout(
"am-info {0}/final.mdl 2>/dev/null | grep -w pdfs".format(dir))
parts = stdout.split()
# number of pdfs 7115
Expand All @@ -288,7 +209,7 @@ def get_number_of_jobs(alidir):
def get_ivector_dim(ivector_dir=None):
if ivector_dir is None:
return 0
[stdout_val, stderr_val] = run_kaldi_command(
stdout_val = get_command_stdout(
"feat-to-dim --print-args=false "
"scp:{dir}/ivector_online.scp -".format(dir=ivector_dir))
ivector_dim = int(stdout_val)
Expand All @@ -297,7 +218,7 @@ def get_ivector_dim(ivector_dir=None):
def get_ivector_extractor_id(ivector_dir=None):
if ivector_dir is None:
return None
[stdout_val, stderr_val] = run_kaldi_command(
stdout_val = get_command_stdout(
"steps/nnet2/get_ivector_id.sh {dir}".format(dir=ivector_dir))

if (stdout_val.strip() == "") or (stdout_val is None):
Expand All @@ -308,25 +229,24 @@ def get_ivector_extractor_id(ivector_dir=None):
def get_feat_dim(feat_dir):
if feat_dir is None:
return 0
[stdout_val, stderr_val] = run_kaldi_command(
stdout_val = get_command_stdout(
"feat-to-dim --print-args=false "
"scp:{data}/feats.scp -".format(data=feat_dir))
feat_dim = int(stdout_val)
return feat_dim


def get_feat_dim_from_scp(feat_scp):
[stdout_val, stderr_val] = run_kaldi_command(
stdout_val = get_command_stdout(
"feat-to-dim --print-args=false "
"scp:{feat_scp} -".format(feat_scp=feat_scp))
feat_dim = int(stdout_val)
return feat_dim


def split_data(data, num_jobs):
run_kaldi_command("utils/split_data.sh {data} {num_jobs}".format(
data=data,
num_jobs=num_jobs))
execute_command("utils/split_data.sh {data} {num_jobs}".format(
data=data, num_jobs=num_jobs))


def read_kaldi_matrix(matrix_file):
Expand Down
Loading