-
Notifications
You must be signed in to change notification settings - Fork 1.2k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add config.yml for global configuration. (#62)
* Add config.yml for global configuration. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix bug in webui.py. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Rename config.yml to default_config.yml. Add ./config.yml to gitignore. * Add config.py to parse config.yml * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
- Loading branch information
1 parent
d3d0e78
commit ec5ec86
Showing
5 changed files
with
321 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -166,3 +166,4 @@ cython_debug/ | |
filelists/* | ||
!/filelists/esd.list | ||
data/* | ||
/config.yml |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,183 @@ | ||
""" | ||
@Desc: 全局配置文件读取 | ||
""" | ||
import argparse | ||
import yaml | ||
from typing import Dict, List | ||
import os | ||
import shutil | ||
|
||
|
||
class Resample_config: | ||
"""重采样配置""" | ||
|
||
def __init__(self, in_dir: str, out_dir: str, sampling_rate: int = 44100): | ||
self.sampling_rate: int = sampling_rate # 目标采样率 | ||
self.in_dir: str = in_dir # 待处理音频目录路径 | ||
self.out_dir: str = out_dir # 重采样输出路径 | ||
|
||
@classmethod | ||
def from_dict(cls, dataset_path: str, data: Dict[str, any]): | ||
"""从字典中生成实例""" | ||
|
||
# 不检查路径是否有效,此逻辑在resample.py中处理 | ||
data["in_dir"] = os.path.join(dataset_path, data["in_dir"]) | ||
data["out_dir"] = os.path.join(dataset_path, data["out_dir"]) | ||
|
||
return cls(**data) | ||
|
||
|
||
class Preprocess_text_config: | ||
"""数据预处理配置""" | ||
|
||
def __init__( | ||
self, | ||
transcription_path: str, | ||
cleaned_path: str, | ||
train_path: str, | ||
val_path: str, | ||
config_path: str, | ||
val_per_spk: int = 5, | ||
max_val_total: int = 10000, | ||
clean: bool = True, | ||
): | ||
self.transcription_path: str = transcription_path # 原始文本文件路径,文本格式应为{wav_path}|{speaker_name}|{language}|{text}。 | ||
self.cleaned_path: str = cleaned_path # 数据清洗后文本路径,可以不填。不填则将在原始文本目录生成 | ||
self.train_path: str = train_path # 训练集路径,可以不填。不填则将在原始文本目录生成 | ||
self.val_path: str = val_path # 验证集路径,可以不填。不填则将在原始文本目录生成 | ||
self.config_path: str = config_path # 配置文件路径 | ||
self.val_per_spk: int = val_per_spk # 每个speaker的验证集条数 | ||
self.max_val_total: int = max_val_total # 验证集最大条数,多于的会被截断并放到训练集中 | ||
self.clean: bool = clean # 是否进行数据清洗 | ||
|
||
@classmethod | ||
def from_dict(cls, dataset_path: str, data: Dict[str, any]): | ||
"""从字典中生成实例""" | ||
|
||
data["transcription_path"] = os.path.join( | ||
dataset_path, data["transcription_path"] | ||
) | ||
data["cleaned_path"] = os.path.join(dataset_path, data["cleaned_path"]) | ||
data["train_path"] = os.path.join(dataset_path, data["train_path"]) | ||
data["val_path"] = os.path.join(dataset_path, data["val_path"]) | ||
data["config_path"] = os.path.join(dataset_path, data["config_path"]) | ||
|
||
return cls(**data) | ||
|
||
|
||
class Bert_gen_config: | ||
"""bert_gen 配置""" | ||
|
||
def __init__( | ||
self, | ||
config_path: str, | ||
num_processes: int = 2, | ||
device: str = "cuda", | ||
): | ||
self.config_path = config_path | ||
self.num_processes = num_processes | ||
self.device = device | ||
|
||
@classmethod | ||
def from_dict(cls, dataset_path: str, data: Dict[str, any]): | ||
data["config_path"] = os.path.join(dataset_path, data["config_path"]) | ||
|
||
return cls(**data) | ||
|
||
|
||
class Train_ms_config: | ||
"""训练配置""" | ||
|
||
def __init__( | ||
self, | ||
config_path: str, | ||
env: Dict[str, any], | ||
model: str, | ||
): | ||
self.env = env # 需要加载的环境变量 | ||
self.model = model # 训练模型存储目录 | ||
self.config_path = config_path # 配置文件路径 | ||
|
||
@classmethod | ||
def from_dict(cls, dataset_path: str, data: Dict[str, any]): | ||
data["model"] = os.path.join(dataset_path, data["model"]) | ||
data["config_path"] = os.path.join(dataset_path, data["config_path"]) | ||
|
||
return cls(**data) | ||
|
||
|
||
class Webui_config: | ||
"""webui 配置""" | ||
|
||
def __init__( | ||
self, | ||
model: str, | ||
config_path: str, | ||
port: int = 7860, | ||
share: bool = False, | ||
debug: bool = False, | ||
): | ||
self.model: str = model # 端口号 | ||
self.config_path: str = config_path # 是否公开部署,对外网开放 | ||
self.port: int = port # 是否开启debug模式 | ||
self.share: bool = share # 模型路径 | ||
self.debug: bool = debug # 配置文件路径 | ||
|
||
@classmethod | ||
def from_dict(cls, dataset_path: str, data: Dict[str, any]): | ||
data["config_path"] = os.path.join(dataset_path, data["config_path"]) | ||
data["model"] = os.path.join(dataset_path, data["model"]) | ||
return cls(**data) | ||
|
||
|
||
class Server_config: | ||
def __init__( | ||
self, models: List[Dict[str, any]], port: int = 5000, device: str = "cuda" | ||
): | ||
self.models: List[Dict[str, any]] = models # 需要加载的所有模型的配置 | ||
self.port: int = port # 端口号 | ||
self.device: str = device # 模型默认使用设备 | ||
|
||
@classmethod | ||
def from_dict(cls, data: Dict[str, any]): | ||
return cls(**data) | ||
|
||
|
||
class Config: | ||
def __init__(self, config_path: str): | ||
if not os.path.isfile(config_path) and os.path.isfile("default_config.yml"): | ||
shutil.copy(src="default_config.yml", dst=config_path) | ||
print(f"已根据默认配置文件default_config.yml生成配置文件{config_path}") | ||
with open(file=config_path, mode="r", encoding="utf-8") as file: | ||
yaml_config: Dict[str, any] = yaml.safe_load(file.read()) | ||
dataset_path: str = yaml_config["dataset_path"] | ||
self.resample_config: Resample_config = Resample_config.from_dict( | ||
dataset_path, yaml_config["resample"] | ||
) | ||
self.preprocess_text_config: Preprocess_text_config = ( | ||
Preprocess_text_config.from_dict( | ||
dataset_path, yaml_config["preprocess_text"] | ||
) | ||
) | ||
self.bert_gen_config: Bert_gen_config = Bert_gen_config.from_dict( | ||
dataset_path, yaml_config["bert_gen"] | ||
) | ||
self.train_ms_config: Train_ms_config = Train_ms_config.from_dict( | ||
dataset_path, yaml_config["train_ms"] | ||
) | ||
self.web_ui_config: Webui_config = Webui_config.from_dict( | ||
dataset_path, yaml_config["webui"] | ||
) | ||
self.server_config: Server_config = Server_config.from_dict( | ||
yaml_config["server"] | ||
) | ||
|
||
|
||
parser = argparse.ArgumentParser() | ||
parser.add_argument("-c", "--config", type=str, default="config.yml") | ||
args = parser.parse_args() | ||
config = Config(args.config) | ||
|
||
if __name__ == "__main__": | ||
config2 = config | ||
pass |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,133 @@ | ||
# 全局配置 | ||
# 对于希望在同一时间使用多个配置文件的情况,例如两个GPU同时跑两个训练集:通过环境变量指定配置文件,不指定则默认为./config.yml | ||
|
||
# 拟提供通用路径配置,统一存放数据,避免数据放得很乱 | ||
# 每个数据集与其对应的模型存放至统一路径下,后续所有的路径配置均为相对于datasetPath的路径 | ||
# 不填或者填空则路径为相对于项目根目录的路径 | ||
dataset_path: "Data/你的数据集" | ||
|
||
|
||
# resample 音频重采样配置 | ||
# 注意, “:” 后需要加空格 | ||
resample: | ||
# 目标重采样率 | ||
sampling_rate: 44100 | ||
# 音频文件输入路径,重采样会将该路径下所有.wav音频文件重采样 | ||
# 请填入相对于datasetPath的相对路径 | ||
in_dir: "audios/raw" # 相对于根目录的路径为 /datasetPath/in_dir | ||
# 音频文件重采样后输出路径 | ||
out_dir: "audios/wavs" | ||
|
||
|
||
# preprocess_text 数据集预处理相关配置 | ||
# 注意, “:” 后需要加空格 | ||
preprocess_text: | ||
# 原始文本文件路径,文本格式应为{wav_path}|{speaker_name}|{language}|{text}。 | ||
transcription_path: "filelists/你的数据集文本.list" | ||
# 数据清洗后文本路径,可以不填。不填则将在原始文本目录生成 | ||
cleaned_path: "" | ||
# 训练集路径,可以不填。不填则将在原始文本目录生成 | ||
train_path: "" | ||
# 验证集路径,可以不填。不填则将在原始文本目录生成 | ||
val_path: "" | ||
# 配置文件路径 | ||
config_path: "config.json" | ||
# 每个speaker的验证集条数 | ||
val_per_spk: 5 | ||
# 验证集最大条数,多于的会被截断并放到训练集中 | ||
max_val_total: 10000 | ||
# 是否进行数据清洗 | ||
clean: true | ||
|
||
|
||
# bert_gen 相关配置 | ||
# 注意, “:” 后需要加空格 | ||
bert_gen: | ||
# 训练数据集配置文件路径 | ||
config_path: "config.json" | ||
# 并行数 | ||
num_processes: 2 | ||
# 使用设备:可选项 "cuda" 显卡推理, "cpu" cpu推理 | ||
# 此配置会影响所有使用bert的任务,包括bert_gen、train_ms、web_ui、api | ||
device: "cuda" | ||
|
||
|
||
# train 训练配置 | ||
# 注意, “:” 后需要加空格 | ||
train_ms: | ||
# 需要加载的环境变量,多显卡训练,RANK推荐手动填写 | ||
env: | ||
MASTER_ADDR: "localhost" | ||
MASTER_PORT: 10086 | ||
WORLD_SIZE: 1 | ||
RANK: 0 | ||
# 可以填写任意名的环境变量 | ||
THE_ENV_VAR_YOU_NEED_TO_USE: "1234567" | ||
# 训练模型存储目录:与旧版本的区别,原先数据集是存放在logs/model_name下的,现在改为统一存放在Data/你的数据集/models下 | ||
model: "models" | ||
# 配置文件路径 | ||
config_path: "config.json" | ||
|
||
|
||
# webui webui配置 | ||
# 注意, “:” 后需要加空格 | ||
webui: | ||
# 端口号 | ||
port: 7860 | ||
# 是否公开部署,对外网开放 | ||
share: false | ||
# 是否开启debug模式 | ||
debug: false | ||
# 模型路径 | ||
model: "models/G_8000.pth" | ||
# 配置文件路径 | ||
config_path: "config.json" | ||
|
||
|
||
# server api配置 | ||
# 注意, “:” 后需要加空格 | ||
# 注意,本配置下的所有配置均为相对于根目录的路径 | ||
server: | ||
# 端口号 | ||
port: 5000 | ||
# 模型默认使用设备 | ||
device: "cuda" | ||
# 需要加载的所有模型的配置 | ||
models: | ||
- # 模型的路径 | ||
model: "" | ||
# 模型config.json的路径 | ||
config: "" | ||
# 模型使用设备,若填写则会覆盖默认配置 | ||
device: "cuda" | ||
# 模型默认使用的语言 | ||
language: "ZH" | ||
# 模型人物默认参数 | ||
# 不必填写所有人物,不填的使用默认值 | ||
speakers: | ||
- speaker: "科比" | ||
sdp_ratio: 0.2 | ||
noise_scale: 0.6 | ||
noise_scale_w: 0.8 | ||
length_scale: 1 | ||
- speaker: "五条悟" | ||
sdp_ratio: 0.3 | ||
noise_scale: 0.7 | ||
noise_scale_w: 0.8 | ||
length_scale: 0.5 | ||
- speaker: "安倍晋三" | ||
sdp_ratio: 0.2 | ||
noise_scale: 0.6 | ||
noise_scale_w: 0.8 | ||
length_scale: 1.2 | ||
- # 模型的路径 | ||
model: "" | ||
# 模型config.json的路径 | ||
config: "" | ||
# 模型使用设备,若填写则会覆盖默认配置 | ||
device: "cpu" | ||
# 模型默认使用的语言 | ||
language: "JP" | ||
# 模型人物默认参数 | ||
# 不必填写所有人物,不填的使用默认值 | ||
speakers: [ ] # 也可以不填 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,3 +21,4 @@ unidic-lite | |
cmudict | ||
fugashi | ||
num2words | ||
PyYAML |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters