diff --git a/tensor2tensor/data_generators/common_voice.py b/tensor2tensor/data_generators/common_voice.py index 41935dc40..1bc8fc126 100644 --- a/tensor2tensor/data_generators/common_voice.py +++ b/tensor2tensor/data_generators/common_voice.py @@ -132,8 +132,8 @@ def generator(self, ] corpus_tar.extractall(tmp_dir, members=members) - data_dir = os.path.join(tmp_dir, "cv_corpus_v1") - data_tuples = _collect_data(data_dir) + raw_data_dir = os.path.join(tmp_dir, "cv_corpus_v1") + data_tuples = _collect_data(raw_data_dir) encoders = self.feature_encoders(data_dir) audio_encoder = encoders["waveforms"] text_encoder = encoders["targets"] diff --git a/tensor2tensor/data_generators/librispeech.py b/tensor2tensor/data_generators/librispeech.py index a6b7cdabc..c19191513 100644 --- a/tensor2tensor/data_generators/librispeech.py +++ b/tensor2tensor/data_generators/librispeech.py @@ -134,8 +134,8 @@ def generator(self, data_dir, tmp_dir, datasets, members.append(f) corpus_tar.extractall(tmp_dir, members=members) - data_dir = os.path.join(tmp_dir, "LibriSpeech", subdir) - data_files = _collect_data(data_dir, "flac", "txt") + raw_data_dir = os.path.join(tmp_dir, "LibriSpeech", subdir) + data_files = _collect_data(raw_data_dir, "flac", "txt") data_pairs = data_files.values() encoders = self.feature_encoders(data_dir)