-
Notifications
You must be signed in to change notification settings - Fork 14
/
hubconf.py
35 lines (26 loc) · 1.21 KB
/
hubconf.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
dependencies = ['torch', 'torchaudio', 'vocos', 'encodec']
def supervoice():
# Imports
import torch
import os
from supervoice_valle import SupervoceNARModel, SupervoceARModel, Tokenizer, Supervoice
from vocos import Vocos
from encodec import EncodecModel
# Load tokenizer
tokenizer = Tokenizer(os.path.join(os.path.dirname(__file__), "tokenizer_text.model"))
# Load encodec
vocos = Vocos.from_pretrained("charactr/vocos-encodec-24khz")
encodec_model = EncodecModel.encodec_model_24khz()
encodec_model.set_target_bandwidth(6.0)
# Load checkpoints
ar_model = SupervoceARModel()
nar_model = SupervoceNARModel()
checkpoint_ar = torch.hub.load_state_dict_from_url("https://shared.korshakov.com/models/supervoice-valle-ar-600000.pt", map_location="cpu")
checkpoint_nar = torch.hub.load_state_dict_from_url("https://shared.korshakov.com/models/supervoice-valle-nar-600000.pt", map_location="cpu")
ar_model.load_state_dict(checkpoint_ar['model'])
nar_model.load_state_dict(checkpoint_nar['model'])
# Create model
model = Supervoice(ar_model, nar_model, encodec_model, vocos, tokenizer)
# Switch to eval mode
model.eval()
return model