Skip to content

Commit

Permalink
Add config.yml for global configuration. (#62)
Browse files Browse the repository at this point in the history
* Add config.yml for global configuration.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Fix bug in webui.py.

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

* Rename config.yml to default_config.yml. Add ./config.yml to gitignore.

* Add config.py to parse config.yml

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
jiangyuxiaoxiao and pre-commit-ci[bot] authored Oct 13, 2023
1 parent d3d0e78 commit ec5ec86
Show file tree
Hide file tree
Showing 5 changed files with 321 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -166,3 +166,4 @@ cython_debug/
filelists/*
!/filelists/esd.list
data/*
/config.yml
183 changes: 183 additions & 0 deletions config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
"""
@Desc: 全局配置文件读取
"""
import argparse
import yaml
from typing import Dict, List
import os
import shutil


class Resample_config:
"""重采样配置"""

def __init__(self, in_dir: str, out_dir: str, sampling_rate: int = 44100):
self.sampling_rate: int = sampling_rate # 目标采样率
self.in_dir: str = in_dir # 待处理音频目录路径
self.out_dir: str = out_dir # 重采样输出路径

@classmethod
def from_dict(cls, dataset_path: str, data: Dict[str, any]):
"""从字典中生成实例"""

# 不检查路径是否有效,此逻辑在resample.py中处理
data["in_dir"] = os.path.join(dataset_path, data["in_dir"])
data["out_dir"] = os.path.join(dataset_path, data["out_dir"])

return cls(**data)


class Preprocess_text_config:
"""数据预处理配置"""

def __init__(
self,
transcription_path: str,
cleaned_path: str,
train_path: str,
val_path: str,
config_path: str,
val_per_spk: int = 5,
max_val_total: int = 10000,
clean: bool = True,
):
self.transcription_path: str = transcription_path # 原始文本文件路径,文本格式应为{wav_path}|{speaker_name}|{language}|{text}。
self.cleaned_path: str = cleaned_path # 数据清洗后文本路径,可以不填。不填则将在原始文本目录生成
self.train_path: str = train_path # 训练集路径,可以不填。不填则将在原始文本目录生成
self.val_path: str = val_path # 验证集路径,可以不填。不填则将在原始文本目录生成
self.config_path: str = config_path # 配置文件路径
self.val_per_spk: int = val_per_spk # 每个speaker的验证集条数
self.max_val_total: int = max_val_total # 验证集最大条数,多于的会被截断并放到训练集中
self.clean: bool = clean # 是否进行数据清洗

@classmethod
def from_dict(cls, dataset_path: str, data: Dict[str, any]):
"""从字典中生成实例"""

data["transcription_path"] = os.path.join(
dataset_path, data["transcription_path"]
)
data["cleaned_path"] = os.path.join(dataset_path, data["cleaned_path"])
data["train_path"] = os.path.join(dataset_path, data["train_path"])
data["val_path"] = os.path.join(dataset_path, data["val_path"])
data["config_path"] = os.path.join(dataset_path, data["config_path"])

return cls(**data)


class Bert_gen_config:
"""bert_gen 配置"""

def __init__(
self,
config_path: str,
num_processes: int = 2,
device: str = "cuda",
):
self.config_path = config_path
self.num_processes = num_processes
self.device = device

@classmethod
def from_dict(cls, dataset_path: str, data: Dict[str, any]):
data["config_path"] = os.path.join(dataset_path, data["config_path"])

return cls(**data)


class Train_ms_config:
"""训练配置"""

def __init__(
self,
config_path: str,
env: Dict[str, any],
model: str,
):
self.env = env # 需要加载的环境变量
self.model = model # 训练模型存储目录
self.config_path = config_path # 配置文件路径

@classmethod
def from_dict(cls, dataset_path: str, data: Dict[str, any]):
data["model"] = os.path.join(dataset_path, data["model"])
data["config_path"] = os.path.join(dataset_path, data["config_path"])

return cls(**data)


class Webui_config:
"""webui 配置"""

def __init__(
self,
model: str,
config_path: str,
port: int = 7860,
share: bool = False,
debug: bool = False,
):
self.model: str = model # 端口号
self.config_path: str = config_path # 是否公开部署,对外网开放
self.port: int = port # 是否开启debug模式
self.share: bool = share # 模型路径
self.debug: bool = debug # 配置文件路径

@classmethod
def from_dict(cls, dataset_path: str, data: Dict[str, any]):
data["config_path"] = os.path.join(dataset_path, data["config_path"])
data["model"] = os.path.join(dataset_path, data["model"])
return cls(**data)


class Server_config:
def __init__(
self, models: List[Dict[str, any]], port: int = 5000, device: str = "cuda"
):
self.models: List[Dict[str, any]] = models # 需要加载的所有模型的配置
self.port: int = port # 端口号
self.device: str = device # 模型默认使用设备

@classmethod
def from_dict(cls, data: Dict[str, any]):
return cls(**data)


class Config:
def __init__(self, config_path: str):
if not os.path.isfile(config_path) and os.path.isfile("default_config.yml"):
shutil.copy(src="default_config.yml", dst=config_path)
print(f"已根据默认配置文件default_config.yml生成配置文件{config_path}")
with open(file=config_path, mode="r", encoding="utf-8") as file:
yaml_config: Dict[str, any] = yaml.safe_load(file.read())
dataset_path: str = yaml_config["dataset_path"]
self.resample_config: Resample_config = Resample_config.from_dict(
dataset_path, yaml_config["resample"]
)
self.preprocess_text_config: Preprocess_text_config = (
Preprocess_text_config.from_dict(
dataset_path, yaml_config["preprocess_text"]
)
)
self.bert_gen_config: Bert_gen_config = Bert_gen_config.from_dict(
dataset_path, yaml_config["bert_gen"]
)
self.train_ms_config: Train_ms_config = Train_ms_config.from_dict(
dataset_path, yaml_config["train_ms"]
)
self.web_ui_config: Webui_config = Webui_config.from_dict(
dataset_path, yaml_config["webui"]
)
self.server_config: Server_config = Server_config.from_dict(
yaml_config["server"]
)


parser = argparse.ArgumentParser()
parser.add_argument("-c", "--config", type=str, default="config.yml")
args = parser.parse_args()
config = Config(args.config)

if __name__ == "__main__":
config2 = config
pass
133 changes: 133 additions & 0 deletions default_config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
# 全局配置
# 对于希望在同一时间使用多个配置文件的情况,例如两个GPU同时跑两个训练集:通过环境变量指定配置文件,不指定则默认为./config.yml

# 拟提供通用路径配置,统一存放数据,避免数据放得很乱
# 每个数据集与其对应的模型存放至统一路径下,后续所有的路径配置均为相对于datasetPath的路径
# 不填或者填空则路径为相对于项目根目录的路径
dataset_path: "Data/你的数据集"


# resample 音频重采样配置
# 注意, “:” 后需要加空格
resample:
# 目标重采样率
sampling_rate: 44100
# 音频文件输入路径,重采样会将该路径下所有.wav音频文件重采样
# 请填入相对于datasetPath的相对路径
in_dir: "audios/raw" # 相对于根目录的路径为 /datasetPath/in_dir
# 音频文件重采样后输出路径
out_dir: "audios/wavs"


# preprocess_text 数据集预处理相关配置
# 注意, “:” 后需要加空格
preprocess_text:
# 原始文本文件路径,文本格式应为{wav_path}|{speaker_name}|{language}|{text}。
transcription_path: "filelists/你的数据集文本.list"
# 数据清洗后文本路径,可以不填。不填则将在原始文本目录生成
cleaned_path: ""
# 训练集路径,可以不填。不填则将在原始文本目录生成
train_path: ""
# 验证集路径,可以不填。不填则将在原始文本目录生成
val_path: ""
# 配置文件路径
config_path: "config.json"
# 每个speaker的验证集条数
val_per_spk: 5
# 验证集最大条数,多于的会被截断并放到训练集中
max_val_total: 10000
# 是否进行数据清洗
clean: true


# bert_gen 相关配置
# 注意, “:” 后需要加空格
bert_gen:
# 训练数据集配置文件路径
config_path: "config.json"
# 并行数
num_processes: 2
# 使用设备:可选项 "cuda" 显卡推理, "cpu" cpu推理
# 此配置会影响所有使用bert的任务,包括bert_gen、train_ms、web_ui、api
device: "cuda"


# train 训练配置
# 注意, “:” 后需要加空格
train_ms:
# 需要加载的环境变量,多显卡训练,RANK推荐手动填写
env:
MASTER_ADDR: "localhost"
MASTER_PORT: 10086
WORLD_SIZE: 1
RANK: 0
# 可以填写任意名的环境变量
THE_ENV_VAR_YOU_NEED_TO_USE: "1234567"
# 训练模型存储目录:与旧版本的区别,原先数据集是存放在logs/model_name下的,现在改为统一存放在Data/你的数据集/models下
model: "models"
# 配置文件路径
config_path: "config.json"


# webui webui配置
# 注意, “:” 后需要加空格
webui:
# 端口号
port: 7860
# 是否公开部署,对外网开放
share: false
# 是否开启debug模式
debug: false
# 模型路径
model: "models/G_8000.pth"
# 配置文件路径
config_path: "config.json"


# server api配置
# 注意, “:” 后需要加空格
# 注意,本配置下的所有配置均为相对于根目录的路径
server:
# 端口号
port: 5000
# 模型默认使用设备
device: "cuda"
# 需要加载的所有模型的配置
models:
- # 模型的路径
model: ""
# 模型config.json的路径
config: ""
# 模型使用设备,若填写则会覆盖默认配置
device: "cuda"
# 模型默认使用的语言
language: "ZH"
# 模型人物默认参数
# 不必填写所有人物,不填的使用默认值
speakers:
- speaker: "科比"
sdp_ratio: 0.2
noise_scale: 0.6
noise_scale_w: 0.8
length_scale: 1
- speaker: "五条悟"
sdp_ratio: 0.3
noise_scale: 0.7
noise_scale_w: 0.8
length_scale: 0.5
- speaker: "安倍晋三"
sdp_ratio: 0.2
noise_scale: 0.6
noise_scale_w: 0.8
length_scale: 1.2
- # 模型的路径
model: ""
# 模型config.json的路径
config: ""
# 模型使用设备,若填写则会覆盖默认配置
device: "cpu"
# 模型默认使用的语言
language: "JP"
# 模型人物默认参数
# 不必填写所有人物,不填的使用默认值
speakers: [ ] # 也可以不填
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,4 @@ unidic-lite
cmudict
fugashi
num2words
PyYAML
3 changes: 3 additions & 0 deletions webui.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,9 @@ def tts_fn(
length_scale=length_scale,
sid=speaker,
language=language,
hps=hps,
net_g=net_g,
device=device,
)
audio_list.append(audio)
silence = np.zeros(hps.data.sampling_rate) # 生成1秒的静音
Expand Down

0 comments on commit ec5ec86

Please sign in to comment.