From 76aa8d933fbbf46917a9cdda9bba2c99868c9e92 Mon Sep 17 00:00:00 2001 From: David Snyder Date: Tue, 22 May 2018 00:29:52 -0400 Subject: [PATCH 1/3] [egs] egs/tedlium/s5_r2/run.sh downloads Tedlium LMs from openslr.org by default --- egs/tedlium/s5_r2/local/ted_download_lm.sh | 17 +++++++++++++++++ egs/tedlium/s5_r2/run.sh | 8 +++++--- 2 files changed, 22 insertions(+), 3 deletions(-) create mode 100755 egs/tedlium/s5_r2/local/ted_download_lm.sh diff --git a/egs/tedlium/s5_r2/local/ted_download_lm.sh b/egs/tedlium/s5_r2/local/ted_download_lm.sh new file mode 100755 index 00000000000..11ee5bc82d6 --- /dev/null +++ b/egs/tedlium/s5_r2/local/ted_download_lm.sh @@ -0,0 +1,17 @@ +#!/bin/bash +# +# Copyright 2018 David Snyder +# Apache 2.0 +# +# This script downloads pre-built language models trained on the Cantab-Tedlium +# text data and Tedlium acoustic training data. If you want to build these +# models yourself, run the script local/ted_train_lm.sh. + +set -e + +echo "$0: downloading Tedlium 4 gram language models (it won't re-download if it was already downloaded.)" +wget --continue http://kaldi-asr.org/models/5/4gram_small.arpa.gz -P data/local/local_lm/data/arpa || exit 1 +wget --continue http://kaldi-asr.org/models/5/4gram_big.arpa.gz -P data/local/local_lm/data/arpa || exit 1 + +exit 0 + diff --git a/egs/tedlium/s5_r2/run.sh b/egs/tedlium/s5_r2/run.sh index 161c416d1f9..541c9ba887b 100755 --- a/egs/tedlium/s5_r2/run.sh +++ b/egs/tedlium/s5_r2/run.sh @@ -57,10 +57,12 @@ if [ $stage -le 3 ]; then fi if [ $stage -le 4 ]; then - # later on we'll change this script so you have the option to - # download the pre-built LMs from openslr.org instead of building them + # Download the pre-built LMs from openslr.org instead of building them # locally. - local/ted_train_lm.sh + local/ted_download_lm.sh + # Uncomment this script to build the language models instead of + # downloading them from openslr.org. + # local/ted_train_lm.sh fi if [ $stage -le 5 ]; then From 4f09bdd9a2285681423fd805ac58b9bbd7484d1d Mon Sep 17 00:00:00 2001 From: David Snyder Date: Tue, 22 May 2018 00:34:32 -0400 Subject: [PATCH 2/3] [egs] fixing typo --- egs/tedlium/s5_r2/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/tedlium/s5_r2/run.sh b/egs/tedlium/s5_r2/run.sh index 541c9ba887b..175c9cfb710 100755 --- a/egs/tedlium/s5_r2/run.sh +++ b/egs/tedlium/s5_r2/run.sh @@ -57,7 +57,7 @@ if [ $stage -le 3 ]; then fi if [ $stage -le 4 ]; then - # Download the pre-built LMs from openslr.org instead of building them + # Download the pre-built LMs from kaldi-asr.org instead of building them # locally. local/ted_download_lm.sh # Uncomment this script to build the language models instead of From 10f9d4e1d5e67847f73047cc6ca1acc865051e58 Mon Sep 17 00:00:00 2001 From: David Snyder Date: Tue, 22 May 2018 00:35:31 -0400 Subject: [PATCH 3/3] [egs] fixing typo --- egs/tedlium/s5_r2/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/egs/tedlium/s5_r2/run.sh b/egs/tedlium/s5_r2/run.sh index 175c9cfb710..e7b5df6055e 100755 --- a/egs/tedlium/s5_r2/run.sh +++ b/egs/tedlium/s5_r2/run.sh @@ -61,7 +61,7 @@ if [ $stage -le 4 ]; then # locally. local/ted_download_lm.sh # Uncomment this script to build the language models instead of - # downloading them from openslr.org. + # downloading them from kaldi-asr.org. # local/ted_train_lm.sh fi