Skip to content

Commit

Permalink
Better mel feature extraction for better alignment
Browse files Browse the repository at this point in the history
Signed-off-by: begeekmyfriend <[email protected]>
  • Loading branch information
begeekmyfriend committed May 31, 2020
1 parent da23553 commit 55ff380
Show file tree
Hide file tree
Showing 3 changed files with 3 additions and 2 deletions.
1 change: 1 addition & 0 deletions common/preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,6 +93,7 @@ def _process_utterance(wav_dir, mel_dir, basename, wav_file, text, hparams):
# Write the spectrogram and audio to disk
filename = f'{basename}.npy'
np.save(os.path.join(wav_dir, filename), wav, allow_pickle=False)
np.save(os.path.join(mel_dir, filename), mel_spectrogram, allow_pickle=False)

# Return a tuple describing this training example
return (filename, time_steps, mel_frames, text)
2 changes: 1 addition & 1 deletion scripts/train_tacotron2.sh
Original file line number Diff line number Diff line change
@@ -1 +1 @@
CUDA_VISIBLE_DEVICES=0 python train.py --amp-run -o logs --init-lr 1e-3 --final-lr 1e-5 --epochs 200 -bs 32 --weight-decay 1e-6 --log-file nvlog.json --dataset-path training_data --training-anchor-dirs tts_fanfanli_22050 tts_xiaoya_22050 tts_yangluzhuo_22050 tts_yuanzhonglu_22050
CUDA_VISIBLE_DEVICES=0 python train.py --amp-run -o logs --init-lr 1e-3 --final-lr 1e-5 --epochs 200 -bs 32 --weight-decay 1e-6 --log-file nvlog.json --dataset-path training_data --training-anchor-dirs --load-mel-from-disk tts_fanfanli_22050 tts_xiaoya_22050 tts_yangluzhuo_22050 tts_yuanzhonglu_22050
2 changes: 1 addition & 1 deletion tacotron2/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def parse_tacotron2_args(parent, add_help=False):
# misc parameters
parser.add_argument('--mask-padding', default=False, type=bool, help='Use mask padding')
parser.add_argument('--n-mel-channels', default=80, type=int, help='Number of bins in mel-spectrograms')
parser.add_argument('--mel_pad_val', default=-11.5129, type=float, help='Corresponding to silence')
parser.add_argument('--mel_pad_val', default=-5, type=float, help='Corresponding to silence')

# symbols parameters
global symbols
Expand Down

0 comments on commit 55ff380

Please sign in to comment.