Add config.yml for global configuration. (#62)

* Add config.yml for global configuration. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Fix bug in webui.py. * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Rename config.yml to default_config.yml. Add ./config.yml to gitignore. * Add config.py to parse config.yml * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
fishaudio · Oct 13, 2023 · ec5ec86 · ec5ec86
1 parent d3d0e78
commit ec5ec86
Show file tree

Hide file tree

Showing 5 changed files with 321 additions and 0 deletions.
diff --git a/.gitignore b/.gitignore
@@ -166,3 +166,4 @@ cython_debug/
 filelists/*
 !/filelists/esd.list
 data/*
+/config.yml
diff --git a/config.py b/config.py
@@ -0,0 +1,183 @@
+"""
+@Desc: 全局配置文件读取
+"""
+import argparse
+import yaml
+from typing import Dict, List
+import os
+import shutil
+
+
+class Resample_config:
+    """重采样配置"""
+
+    def __init__(self, in_dir: str, out_dir: str, sampling_rate: int = 44100):
+        self.sampling_rate: int = sampling_rate  # 目标采样率
+        self.in_dir: str = in_dir  # 待处理音频目录路径
+        self.out_dir: str = out_dir  # 重采样输出路径
+
+    @classmethod
+    def from_dict(cls, dataset_path: str, data: Dict[str, any]):
+        """从字典中生成实例"""
+
+        # 不检查路径是否有效，此逻辑在resample.py中处理
+        data["in_dir"] = os.path.join(dataset_path, data["in_dir"])
+        data["out_dir"] = os.path.join(dataset_path, data["out_dir"])
+
+        return cls(**data)
+
+
+class Preprocess_text_config:
+    """数据预处理配置"""
+
+    def __init__(
+        self,
+        transcription_path: str,
+        cleaned_path: str,
+        train_path: str,
+        val_path: str,
+        config_path: str,
+        val_per_spk: int = 5,
+        max_val_total: int = 10000,
+        clean: bool = True,
+    ):
+        self.transcription_path: str = transcription_path  # 原始文本文件路径，文本格式应为{wav_path}|{speaker_name}|{language}|{text}。
+        self.cleaned_path: str = cleaned_path  # 数据清洗后文本路径，可以不填。不填则将在原始文本目录生成
+        self.train_path: str = train_path  # 训练集路径，可以不填。不填则将在原始文本目录生成
+        self.val_path: str = val_path  # 验证集路径，可以不填。不填则将在原始文本目录生成
+        self.config_path: str = config_path  # 配置文件路径
+        self.val_per_spk: int = val_per_spk  # 每个speaker的验证集条数
+        self.max_val_total: int = max_val_total  # 验证集最大条数，多于的会被截断并放到训练集中
+        self.clean: bool = clean  # 是否进行数据清洗
+
+    @classmethod
+    def from_dict(cls, dataset_path: str, data: Dict[str, any]):
+        """从字典中生成实例"""
+
+        data["transcription_path"] = os.path.join(
+            dataset_path, data["transcription_path"]
+        )
+        data["cleaned_path"] = os.path.join(dataset_path, data["cleaned_path"])
+        data["train_path"] = os.path.join(dataset_path, data["train_path"])
+        data["val_path"] = os.path.join(dataset_path, data["val_path"])
+        data["config_path"] = os.path.join(dataset_path, data["config_path"])
+
+        return cls(**data)
+
+
+class Bert_gen_config:
+    """bert_gen 配置"""
+
+    def __init__(
+        self,
+        config_path: str,
+        num_processes: int = 2,
+        device: str = "cuda",
+    ):
+        self.config_path = config_path
+        self.num_processes = num_processes
+        self.device = device
+
+    @classmethod
+    def from_dict(cls, dataset_path: str, data: Dict[str, any]):
+        data["config_path"] = os.path.join(dataset_path, data["config_path"])
+
+        return cls(**data)
+
+
+class Train_ms_config:
+    """训练配置"""
+
+    def __init__(
+        self,
+        config_path: str,
+        env: Dict[str, any],
+        model: str,
+    ):
+        self.env = env  # 需要加载的环境变量
+        self.model = model  # 训练模型存储目录
+        self.config_path = config_path  # 配置文件路径
+
+    @classmethod
+    def from_dict(cls, dataset_path: str, data: Dict[str, any]):
+        data["model"] = os.path.join(dataset_path, data["model"])
+        data["config_path"] = os.path.join(dataset_path, data["config_path"])
+
+        return cls(**data)
+
+
+class Webui_config:
+    """webui 配置"""
+
+    def __init__(
+        self,
+        model: str,
+        config_path: str,
+        port: int = 7860,
+        share: bool = False,
+        debug: bool = False,
+    ):
+        self.model: str = model  # 端口号
+        self.config_path: str = config_path  # 是否公开部署，对外网开放
+        self.port: int = port  # 是否开启debug模式
+        self.share: bool = share  # 模型路径
+        self.debug: bool = debug  # 配置文件路径
+
+    @classmethod
+    def from_dict(cls, dataset_path: str, data: Dict[str, any]):
+        data["config_path"] = os.path.join(dataset_path, data["config_path"])
+        data["model"] = os.path.join(dataset_path, data["model"])
+        return cls(**data)
+
+
+class Server_config:
+    def __init__(
+        self, models: List[Dict[str, any]], port: int = 5000, device: str = "cuda"
+    ):
+        self.models: List[Dict[str, any]] = models  # 需要加载的所有模型的配置
+        self.port: int = port  # 端口号
+        self.device: str = device  # 模型默认使用设备
+
+    @classmethod
+    def from_dict(cls, data: Dict[str, any]):
+        return cls(**data)
+
+
+class Config:
+    def __init__(self, config_path: str):
+        if not os.path.isfile(config_path) and os.path.isfile("default_config.yml"):
+            shutil.copy(src="default_config.yml", dst=config_path)
+            print(f"已根据默认配置文件default_config.yml生成配置文件{config_path}")
+        with open(file=config_path, mode="r", encoding="utf-8") as file:
+            yaml_config: Dict[str, any] = yaml.safe_load(file.read())
+            dataset_path: str = yaml_config["dataset_path"]
+            self.resample_config: Resample_config = Resample_config.from_dict(
+                dataset_path, yaml_config["resample"]
+            )
+            self.preprocess_text_config: Preprocess_text_config = (
+                Preprocess_text_config.from_dict(
+                    dataset_path, yaml_config["preprocess_text"]
+                )
+            )
+            self.bert_gen_config: Bert_gen_config = Bert_gen_config.from_dict(
+                dataset_path, yaml_config["bert_gen"]
+            )
+            self.train_ms_config: Train_ms_config = Train_ms_config.from_dict(
+                dataset_path, yaml_config["train_ms"]
+            )
+            self.web_ui_config: Webui_config = Webui_config.from_dict(
+                dataset_path, yaml_config["webui"]
+            )
+            self.server_config: Server_config = Server_config.from_dict(
+                yaml_config["server"]
+            )
+
+
+parser = argparse.ArgumentParser()
+parser.add_argument("-c", "--config", type=str, default="config.yml")
+args = parser.parse_args()
+config = Config(args.config)
+
+if __name__ == "__main__":
+    config2 = config
+    pass
diff --git a/default_config.yml b/default_config.yml
@@ -0,0 +1,133 @@
+# 全局配置
+# 对于希望在同一时间使用多个配置文件的情况，例如两个GPU同时跑两个训练集：通过环境变量指定配置文件，不指定则默认为./config.yml
+
+# 拟提供通用路径配置，统一存放数据，避免数据放得很乱
+# 每个数据集与其对应的模型存放至统一路径下，后续所有的路径配置均为相对于datasetPath的路径
+# 不填或者填空则路径为相对于项目根目录的路径
+dataset_path: "Data/你的数据集"
+
+
+# resample 音频重采样配置
+# 注意， “:” 后需要加空格
+resample:
+  # 目标重采样率
+  sampling_rate: 44100
+  # 音频文件输入路径，重采样会将该路径下所有.wav音频文件重采样
+  # 请填入相对于datasetPath的相对路径
+  in_dir: "audios/raw" # 相对于根目录的路径为 /datasetPath/in_dir
+  # 音频文件重采样后输出路径
+  out_dir: "audios/wavs"
+
+
+# preprocess_text 数据集预处理相关配置
+# 注意， “:” 后需要加空格
+preprocess_text:
+  # 原始文本文件路径，文本格式应为{wav_path}|{speaker_name}|{language}|{text}。
+  transcription_path: "filelists/你的数据集文本.list"
+  # 数据清洗后文本路径，可以不填。不填则将在原始文本目录生成
+  cleaned_path: ""
+  # 训练集路径，可以不填。不填则将在原始文本目录生成
+  train_path: ""
+  # 验证集路径，可以不填。不填则将在原始文本目录生成
+  val_path: ""
+  # 配置文件路径
+  config_path: "config.json"
+  # 每个speaker的验证集条数
+  val_per_spk: 5
+  # 验证集最大条数，多于的会被截断并放到训练集中
+  max_val_total: 10000
+  # 是否进行数据清洗
+  clean: true
+
+
+# bert_gen 相关配置
+# 注意， “:” 后需要加空格
+bert_gen:
+  # 训练数据集配置文件路径
+  config_path: "config.json"
+  # 并行数
+  num_processes: 2
+  # 使用设备：可选项 "cuda" 显卡推理， "cpu" cpu推理
+  # 此配置会影响所有使用bert的任务，包括bert_gen、train_ms、web_ui、api
+  device: "cuda"
+
+
+# train 训练配置
+# 注意， “:” 后需要加空格
+train_ms:
+  # 需要加载的环境变量，多显卡训练，RANK推荐手动填写
+  env:
+    MASTER_ADDR: "localhost"
+    MASTER_PORT: 10086
+    WORLD_SIZE: 1
+    RANK: 0
+    # 可以填写任意名的环境变量
+    THE_ENV_VAR_YOU_NEED_TO_USE: "1234567"
+  # 训练模型存储目录：与旧版本的区别，原先数据集是存放在logs/model_name下的，现在改为统一存放在Data/你的数据集/models下
+  model: "models"
+  # 配置文件路径
+  config_path: "config.json"
+
+
+# webui webui配置
+# 注意， “:” 后需要加空格
+webui:
+  # 端口号
+  port: 7860
+  # 是否公开部署，对外网开放
+  share: false
+  # 是否开启debug模式
+  debug: false
+  # 模型路径
+  model: "models/G_8000.pth"
+  # 配置文件路径
+  config_path: "config.json"
+
+
+# server api配置
+# 注意， “:” 后需要加空格
+# 注意，本配置下的所有配置均为相对于根目录的路径
+server:
+  # 端口号
+  port: 5000
+  # 模型默认使用设备
+  device: "cuda"
+  # 需要加载的所有模型的配置
+  models:
+    - # 模型的路径
+      model: ""
+      # 模型config.json的路径
+      config: ""
+      # 模型使用设备，若填写则会覆盖默认配置
+      device: "cuda"
+      # 模型默认使用的语言
+      language: "ZH"
+      # 模型人物默认参数
+      # 不必填写所有人物，不填的使用默认值
+      speakers:
+        - speaker: "科比"
+          sdp_ratio: 0.2
+          noise_scale: 0.6
+          noise_scale_w: 0.8
+          length_scale: 1
+        - speaker: "五条悟"
+          sdp_ratio: 0.3
+          noise_scale: 0.7
+          noise_scale_w: 0.8
+          length_scale: 0.5
+        - speaker: "安倍晋三"
+          sdp_ratio: 0.2
+          noise_scale: 0.6
+          noise_scale_w: 0.8
+          length_scale: 1.2
+    - # 模型的路径
+      model: ""
+      # 模型config.json的路径
+      config: ""
+      # 模型使用设备，若填写则会覆盖默认配置
+      device: "cpu"
+      # 模型默认使用的语言
+      language: "JP"
+      # 模型人物默认参数
+      # 不必填写所有人物，不填的使用默认值
+      speakers: [ ] # 也可以不填
diff --git a/requirements.txt b/requirements.txt
@@ -21,3 +21,4 @@ unidic-lite
 cmudict
 fugashi
 num2words
+PyYAML
diff --git a/webui.py b/webui.py
@@ -48,6 +48,9 @@ def tts_fn(
                 length_scale=length_scale,
                 sid=speaker,
                 language=language,
+                hps=hps,
+                net_g=net_g,
+                device=device,
             )
             audio_list.append(audio)
             silence = np.zeros(hps.data.sampling_rate)  # 生成1秒的静音
-Original file line number
+Diff line change
@@ Expand Up / @@ -21,3 +21,4 @@ unidic-lite @@
     cmudict
     fugashi
     num2words
+    PyYAML