diff --git a/egs/iam/v1/local/train_lm.sh b/egs/iam/v1/local/train_lm.sh index 911f54c5439..3e8c838efdb 100755 --- a/egs/iam/v1/local/train_lm.sh +++ b/egs/iam/v1/local/train_lm.sh @@ -60,7 +60,7 @@ if [ $stage -le 0 ]; then # Using LOB and brown corpus. if [ ! -f data/local/lob-train-only.txt ]; then cat data/local/lobcorpus/0167/download/LOB_COCOA/lob.txt | \ - local/remove_test_utterances_from_lob.py data/test/text.old data/val/text.old \ + local/remove_test_utterances_from_lob.py data/test/text data/val/text \ > data/local/lob-train-only.txt fi cat data/local/lob-train-only.txt > ${dir}/data/text/lob.txt diff --git a/egs/uw3/v1/local/process_data.py b/egs/uw3/v1/local/process_data.py index 3643c0aca89..23b8e5402cf 100755 --- a/egs/uw3/v1/local/process_data.py +++ b/egs/uw3/v1/local/process_data.py @@ -52,10 +52,10 @@ # The dataset is randomly split train 95% and test 5% coin = random.randint(0, 20) if coin >= 1: - train_text_fh.write(utt_id + ' ' + text + '\n') + train_text_fh.write("{} {}\n".format(utt_id, text)) train_utt2spk_fh.write("{} {}\n".format(utt_id, page_count)) - train_image_fh.write("{} {}\n".format(utt_id, image_path) + train_image_fh.write("{} {}\n".format(utt_id, image_path)) elif coin < 1: test_text_fh.write("{} {}\n".format(utt_id, text)) test_utt2spk_fh.write("{} {}\n".format(utt_id, page_count)) - train_image_fh.write("{} {}\n".format(utt_id, image_path) + train_image_fh.write("{} {}\n".format(utt_id, image_path))