Skip to content

Commit

Permalink
Change hop_size into 256 compatible with MelGAN
Browse files Browse the repository at this point in the history
Signed-off-by: begeekmyfriend <[email protected]>
  • Loading branch information
begeekmyfriend committed Dec 12, 2019
1 parent 8e954ad commit f6cb1a3
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 14 deletions.
16 changes: 8 additions & 8 deletions hparams.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,12 +18,12 @@

# Settings for all models
sample_rate = 22050
n_fft = 2048
n_fft = 1024
fft_bins = n_fft // 2 + 1
num_mels = 80
mel_bias = 1
hop_length = 275 # 12.5ms - in line with Tacotron 2 paper
win_length = 1100 # 50ms - same reason as above
mel_bias = 2
hop_length = 256 # 12.5ms - in line with Tacotron 2 paper
win_length = 1024 # 50ms - same reason as above
fmin = 50
min_level_db = -120
ref_level_db = 20
Expand All @@ -37,7 +37,7 @@

# Model Hparams
voc_mode = 'RAW' # either 'RAW' (softmax on raw bits) or 'MOL' (sample from mixture of logistics)
voc_upsample_factors = (5, 5, 11) # NB - this needs to correctly factorise hop_length
voc_upsample_factors = (4, 8, 8) # NB - this needs to correctly factorise hop_length
voc_rnn_dims = 512
voc_fc_dims = 512
voc_compute_dims = 128
Expand All @@ -47,7 +47,7 @@
# Training
voc_batch_size = 64
voc_lr = 1e-4
voc_checkpoint_every = 20_000
voc_checkpoint_every = 25_000
voc_gen_at_checkpoint = 5 # number of samples to generate at each checkpoint
voc_total_steps = 1_000_000 # Total number of training steps
voc_test_samples = 50 # How many unseen samples to put aside for testing
Expand All @@ -57,8 +57,8 @@

# Generating / Synthesizing
voc_gen_batched = True # very fast (realtime+) single utterance batched generation
voc_target = 5_500 # target number of samples to be generated in each batch entry
voc_overlap = 275 # number of samples for crossfading between batches
voc_target = 5_120 # target number of samples to be generated in each batch entry
voc_overlap = 256 # number of samples for crossfading between batches


# TACOTRON/TTS -----------------------------------------------------------------------------------------------------#
Expand Down
9 changes: 3 additions & 6 deletions models/fatchord_version.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,8 +121,6 @@ def __init__(self, rnn_dims, fc_dims, bits, pad, upsample_factors,
def forward(self, x, mels) :
self.step += 1
bsize = x.size(0)
h1 = torch.zeros(1).cuda().repeat(1, bsize, self.rnn_dims)
h2 = torch.zeros(1).cuda().repeat(1, bsize, self.rnn_dims)
mels, aux = self.upsample(mels)

aux_idx = [self.aux_dims * i for i in range(5)]
Expand All @@ -134,12 +132,12 @@ def forward(self, x, mels) :
x = torch.cat([x.unsqueeze(-1), mels, a1], dim=2)
x = self.I(x)
res = x
x, _ = self.rnn1(x, h1)
x, _ = self.rnn1(x)

x = x + res
res = x
x = torch.cat([x, a2], dim=2)
x, _ = self.rnn2(x, h2)
x, _ = self.rnn2(x)

x = x + res
x = torch.cat([x, a3], dim=2)
Expand Down Expand Up @@ -183,8 +181,7 @@ def generate(self, mels, save_path, batched, target, overlap, mu_law):

m_t = mels[:, i, :]

a1_t, a2_t, a3_t, a4_t = \
(a[:, i, :] for a in aux_split)
a1_t, a2_t, a3_t, a4_t = (a[:, i, :] for a in aux_split)

x = torch.cat([x, m_t, a1_t], dim=1)
x = self.I(x)
Expand Down

0 comments on commit f6cb1a3

Please sign in to comment.