Skip to content

Commit

Permalink
load speaker_encoder_ap and compute x_vector directly from the input …
Browse files Browse the repository at this point in the history
…file in speaker manager
  • Loading branch information
erogol committed Apr 23, 2021
1 parent ad047c8 commit c80d21f
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 0 deletions.
16 changes: 16 additions & 0 deletions TTS/tts/utils/speakers.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import torch

from TTS.speaker_encoder.utils.generic_utils import setup_model
from TTS.utils.audio import AudioProcessor
from TTS.utils.io import load_config


Expand Down Expand Up @@ -143,6 +144,7 @@ def __init__(
self.speaker_ids = None
self.clip_ids = None
self.speaker_encoder = None
self.speaker_encoder_ap = None

if x_vectors_file_path:
self.load_x_vectors_file(x_vectors_file_path)
Expand Down Expand Up @@ -230,6 +232,20 @@ def init_speaker_encoder(self, model_path: str, config_path: str):
self.speaker_encoder_config = load_config(config_path)
self.speaker_encoder = setup_model(self.speaker_encoder_config)
self.speaker_encoder.load_checkpoint(config_path, model_path, True)
self.speaker_encoder_ap = AudioProcessor(
**self.speaker_encoder_config.audio)
# normalize the input audio level and trim silences
self.speaker_encoder_ap.do_sound_norm = True
self.speaker_encoder_ap.do_trim_silence = True

def compute_x_vector_from_clip(self, wav_file):
waveform = self.speaker_encoder_ap.load_wav(
wav_file, sr=self.speaker_encoder_ap.sample_rate)
spec = self.speaker_encoder_ap.melspectrogram(waveform)
spec = torch.from_numpy(spec.T)
spec = spec.unsqueeze(0)
x_vector = self.speaker_encoder.compute_embedding(spec)
return x_vector

def compute_x_vector(self, feats):
if isinstance(feats, np.ndarray):
Expand Down
File renamed without changes.

0 comments on commit c80d21f

Please sign in to comment.