Skip to content

Commit

Permalink
Fix voice conversion inference
Browse files Browse the repository at this point in the history
  • Loading branch information
Edresson committed May 20, 2022
1 parent e5d8ec2 commit 808ae01
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 4 deletions.
2 changes: 1 addition & 1 deletion TTS/bin/synthesize.py
Original file line number Diff line number Diff line change
Expand Up @@ -171,7 +171,7 @@ def main():
help="wav file(s) to condition a multi-speaker TTS model with a Speaker Encoder. You can give multiple file paths. The d_vectors is computed as their average.",
default=None,
)
parser.add_argument("--gst_style", help="Wav path file for GST stylereference.", default=None)
parser.add_argument("--gst_style", help="Wav path file for GST style reference.", default=None)
parser.add_argument(
"--list_speaker_idxs",
help="List available speaker ids for the defined multi-speaker model.",
Expand Down
4 changes: 2 additions & 2 deletions TTS/tts/models/vits.py
Original file line number Diff line number Diff line change
Expand Up @@ -1127,7 +1127,7 @@ def inference_voice_conversion(
self.config.audio.hop_length,
self.config.audio.win_length,
center=False,
).transpose(1, 2)
)
y_lengths = torch.tensor([y.size(-1)]).to(y.device)
speaker_cond_src = reference_speaker_id if reference_speaker_id is not None else reference_d_vector
speaker_cond_tgt = speaker_id if speaker_id is not None else d_vector
Expand Down Expand Up @@ -1157,7 +1157,7 @@ def voice_conversion(self, y, y_lengths, speaker_cond_src, speaker_cond_tgt):
else:
raise RuntimeError(" [!] Voice conversion is only supported on multi-speaker models.")

z, _, _, y_mask = self.posterior_encoder(y.transpose(1, 2), y_lengths, g=g_src)
z, _, _, y_mask = self.posterior_encoder(y, y_lengths, g=g_src)
z_p = self.flow(z, y_mask, g=g_src)
z_hat = self.flow(z_p, y_mask, g=g_tgt, reverse=True)
o_hat = self.waveform_decoder(z_hat * y_mask, g=g_tgt)
Expand Down
2 changes: 1 addition & 1 deletion TTS/utils/synthesizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ def tts(
# get the speaker embedding or speaker id for the reference wav file
reference_speaker_embedding = None
reference_speaker_id = None
if self.tts_speakers_file or hasattr(self.tts_model.speaker_manager, "speaker_ids"):
if self.tts_speakers_file or hasattr(self.tts_model.speaker_manager, "ids"):
if reference_speaker_name and isinstance(reference_speaker_name, str):
if self.tts_config.use_d_vector_file:
# get the speaker embedding from the saved d_vectors.
Expand Down

0 comments on commit 808ae01

Please sign in to comment.