diff --git a/hparams.py b/hparams.py index d61f4eb1..9a3ec617 100644 --- a/hparams.py +++ b/hparams.py @@ -18,12 +18,12 @@ # Settings for all models sample_rate = 22050 -n_fft = 2048 +n_fft = 1024 fft_bins = n_fft // 2 + 1 num_mels = 80 -mel_bias = 1 -hop_length = 275 # 12.5ms - in line with Tacotron 2 paper -win_length = 1100 # 50ms - same reason as above +mel_bias = 2 +hop_length = 256 # 12.5ms - in line with Tacotron 2 paper +win_length = 1024 # 50ms - same reason as above fmin = 50 min_level_db = -120 ref_level_db = 20 @@ -37,7 +37,7 @@ # Model Hparams voc_mode = 'RAW' # either 'RAW' (softmax on raw bits) or 'MOL' (sample from mixture of logistics) -voc_upsample_factors = (5, 5, 11) # NB - this needs to correctly factorise hop_length +voc_upsample_factors = (4, 8, 8) # NB - this needs to correctly factorise hop_length voc_rnn_dims = 512 voc_fc_dims = 512 voc_compute_dims = 128 @@ -47,7 +47,7 @@ # Training voc_batch_size = 64 voc_lr = 1e-4 -voc_checkpoint_every = 20_000 +voc_checkpoint_every = 25_000 voc_gen_at_checkpoint = 5 # number of samples to generate at each checkpoint voc_total_steps = 1_000_000 # Total number of training steps voc_test_samples = 50 # How many unseen samples to put aside for testing @@ -57,8 +57,8 @@ # Generating / Synthesizing voc_gen_batched = True # very fast (realtime+) single utterance batched generation -voc_target = 5_500 # target number of samples to be generated in each batch entry -voc_overlap = 275 # number of samples for crossfading between batches +voc_target = 5_120 # target number of samples to be generated in each batch entry +voc_overlap = 256 # number of samples for crossfading between batches # TACOTRON/TTS -----------------------------------------------------------------------------------------------------# diff --git a/models/fatchord_version.py b/models/fatchord_version.py index 4af61f02..a3711ca5 100644 --- a/models/fatchord_version.py +++ b/models/fatchord_version.py @@ -121,8 +121,6 @@ def __init__(self, rnn_dims, fc_dims, bits, pad, upsample_factors, def forward(self, x, mels) : self.step += 1 bsize = x.size(0) - h1 = torch.zeros(1).cuda().repeat(1, bsize, self.rnn_dims) - h2 = torch.zeros(1).cuda().repeat(1, bsize, self.rnn_dims) mels, aux = self.upsample(mels) aux_idx = [self.aux_dims * i for i in range(5)] @@ -134,12 +132,12 @@ def forward(self, x, mels) : x = torch.cat([x.unsqueeze(-1), mels, a1], dim=2) x = self.I(x) res = x - x, _ = self.rnn1(x, h1) + x, _ = self.rnn1(x) x = x + res res = x x = torch.cat([x, a2], dim=2) - x, _ = self.rnn2(x, h2) + x, _ = self.rnn2(x) x = x + res x = torch.cat([x, a3], dim=2) @@ -183,8 +181,7 @@ def generate(self, mels, save_path, batched, target, overlap, mu_law): m_t = mels[:, i, :] - a1_t, a2_t, a3_t, a4_t = \ - (a[:, i, :] for a in aux_split) + a1_t, a2_t, a3_t, a4_t = (a[:, i, :] for a in aux_split) x = torch.cat([x, m_t, a1_t], dim=1) x = self.I(x)