Change hop_size into 256 compatible with MelGAN

Signed-off-by: begeekmyfriend <[email protected]>
begeekmyfriend · Dec 12, 2019 · f6cb1a3 · f6cb1a3
1 parent 8e954ad
commit f6cb1a3
Show file tree

Hide file tree

Showing 2 changed files with 11 additions and 14 deletions.
diff --git a/hparams.py b/hparams.py
@@ -18,12 +18,12 @@
 
 # Settings for all models
 sample_rate = 22050
-n_fft = 2048
+n_fft = 1024
 fft_bins = n_fft // 2 + 1
 num_mels = 80
-mel_bias = 1
-hop_length = 275                    # 12.5ms - in line with Tacotron 2 paper
-win_length = 1100                   # 50ms - same reason as above
+mel_bias = 2
+hop_length = 256                    # 12.5ms - in line with Tacotron 2 paper
+win_length = 1024                   # 50ms - same reason as above
 fmin = 50
 min_level_db = -120
 ref_level_db = 20
@@ -37,7 +37,7 @@
 
 # Model Hparams
 voc_mode = 'RAW'                    # either 'RAW' (softmax on raw bits) or 'MOL' (sample from mixture of logistics)
-voc_upsample_factors = (5, 5, 11)   # NB - this needs to correctly factorise hop_length
+voc_upsample_factors = (4, 8, 8)    # NB - this needs to correctly factorise hop_length
 voc_rnn_dims = 512
 voc_fc_dims = 512
 voc_compute_dims = 128
@@ -47,7 +47,7 @@
 # Training
 voc_batch_size = 64
 voc_lr = 1e-4
-voc_checkpoint_every = 20_000
+voc_checkpoint_every = 25_000
 voc_gen_at_checkpoint = 5           # number of samples to generate at each checkpoint
 voc_total_steps = 1_000_000         # Total number of training steps
 voc_test_samples = 50               # How many unseen samples to put aside for testing
@@ -57,8 +57,8 @@
 
 # Generating / Synthesizing
 voc_gen_batched = True              # very fast (realtime+) single utterance batched generation
-voc_target = 5_500                 # target number of samples to be generated in each batch entry
-voc_overlap = 275                   # number of samples for crossfading between batches
+voc_target = 5_120                  # target number of samples to be generated in each batch entry
+voc_overlap = 256                   # number of samples for crossfading between batches
 
 
 # TACOTRON/TTS -----------------------------------------------------------------------------------------------------#

diff --git a/models/fatchord_version.py b/models/fatchord_version.py
@@ -121,8 +121,6 @@ def __init__(self, rnn_dims, fc_dims, bits, pad, upsample_factors,
     def forward(self, x, mels) :
         self.step += 1
         bsize = x.size(0)
-        h1 = torch.zeros(1).cuda().repeat(1, bsize, self.rnn_dims)
-        h2 = torch.zeros(1).cuda().repeat(1, bsize, self.rnn_dims)
         mels, aux = self.upsample(mels)
 
         aux_idx = [self.aux_dims * i for i in range(5)]
@@ -134,12 +132,12 @@ def forward(self, x, mels) :
         x = torch.cat([x.unsqueeze(-1), mels, a1], dim=2)
         x = self.I(x)
         res = x
-        x, _ = self.rnn1(x, h1)
+        x, _ = self.rnn1(x)
 
         x = x + res
         res = x
         x = torch.cat([x, a2], dim=2)
-        x, _ = self.rnn2(x, h2)
+        x, _ = self.rnn2(x)
 
         x = x + res
         x = torch.cat([x, a3], dim=2)
@@ -183,8 +181,7 @@ def generate(self, mels, save_path, batched, target, overlap, mu_law):
 
                 m_t = mels[:, i, :]
 
-                a1_t, a2_t, a3_t, a4_t = \
-                    (a[:, i, :] for a in aux_split)
+                a1_t, a2_t, a3_t, a4_t = (a[:, i, :] for a in aux_split)
 
                 x = torch.cat([x, m_t, a1_t], dim=1)
                 x = self.I(x)