整理: ユーザー辞書のdocstring・変数名・型・コメント (#836)

tarepan · web-flow · commit 6f5c384d9055 · 2023-12-13T12:57:27.000+09:00
diff --git a/test/test_user_dict.py b/test/test_user_dict.py
@@ -11,8 +11,8 @@
 from voicevox_engine.model import UserDictWord, WordTypes
 from voicevox_engine.part_of_speech_data import MAX_PRIORITY, part_of_speech_data
 from voicevox_engine.user_dict import (
+    _create_word,
     apply_word,
-    create_word,
     delete_word,
     import_user_dict,
     read_dict,
@@ -90,7 +90,7 @@ def test_read_not_exist_json(self):
     def test_create_word(self):
         # 将来的に品詞などが追加された時にテストを増やす
         self.assertEqual(
-            create_word(surface="test", pronunciation="テスト", accent_type=1),
+            _create_word(surface="test", pronunciation="テスト", accent_type=1),
             UserDictWord(
                 surface="ｔｅｓｔ",
                 priority=5,
@@ -219,7 +219,7 @@ def test_priority(self):
         for pos in part_of_speech_data:
             for i in range(MAX_PRIORITY + 1):
                 self.assertEqual(
-                    create_word(
+                    _create_word(
                         surface="test",
                         pronunciation="テスト",
                         accent_type=1,
diff --git a/voicevox_engine/user_dict.py b/voicevox_engine/user_dict.py
@@ -21,27 +21,39 @@
 if not save_dir.is_dir():
     save_dir.mkdir(parents=True)
 
-default_dict_path = root_dir / "default.csv"
-user_dict_path = save_dir / "user_dict.json"
-compiled_dict_path = save_dir / "user.dic"
+default_dict_path = root_dir / "default.csv"  # VOICEVOXデフォルト辞書ファイルのパス
+user_dict_path = save_dir / "user_dict.json"  # ユーザー辞書ファイルのパス
+compiled_dict_path = save_dir / "user.dic"  # コンパイル済み辞書ファイルのパス
 
 
+# 同時書き込みの制御
 mutex_user_dict = threading.Lock()
 mutex_openjtalk_dict = threading.Lock()
 
 
 @mutex_wrapper(mutex_user_dict)
-def write_to_json(user_dict: Dict[str, UserDictWord], user_dict_path: Path):
+def _write_to_json(user_dict: Dict[str, UserDictWord], user_dict_path: Path) -> None:
+    """
+    ユーザー辞書ファイルへのユーザー辞書データ書き込み
+    Parameters
+    ----------
+    user_dict : Dict[str, UserDictWord]
+        ユーザー辞書データ
+    user_dict_path : Path
+        ユーザー辞書ファイルのパス
+    """
     converted_user_dict = {}
     for word_uuid, word in user_dict.items():
         word_dict = word.dict()
-        word_dict["cost"] = priority2cost(
+        word_dict["cost"] = _priority2cost(
             word_dict["context_id"], word_dict["priority"]
         )
         del word_dict["priority"]
         converted_user_dict[word_uuid] = word_dict
     # 予めjsonに変換できることを確かめる
     user_dict_json = json.dumps(converted_user_dict, ensure_ascii=False)
+
+    # ユーザー辞書ファイルへの書き込み
     user_dict_path.write_text(user_dict_json, encoding="utf-8")
 
 
@@ -50,21 +62,38 @@ def update_dict(
     default_dict_path: Path = default_dict_path,
     user_dict_path: Path = user_dict_path,
     compiled_dict_path: Path = compiled_dict_path,
-):
+) -> None:
+    """
+    辞書の更新
+    Parameters
+    ----------
+    default_dict_path : Path
+        デフォルト辞書ファイルのパス
+    user_dict_path : Path
+        ユーザー辞書ファイルのパス
+    compiled_dict_path : Path
+        コンパイル済み辞書ファイルのパス
+    """
     random_string = uuid4()
-    tmp_csv_path = save_dir / f".tmp.dict_csv-{random_string}"
-    tmp_compiled_path = save_dir / f".tmp.dict_compiled-{random_string}"
+    tmp_csv_path = save_dir / f".tmp.dict_csv-{random_string}"  # csv形式辞書データの一時保存ファイル
+    tmp_compiled_path = (
+        save_dir / f".tmp.dict_compiled-{random_string}"
+    )  # コンパイル済み辞書データの一時保存ファイル
 
     try:
         # 辞書.csvを作成
         csv_text = ""
+
+        # デフォルト辞書データの追加
         if not default_dict_path.is_file():
             print("Warning: Cannot find default dictionary.", file=sys.stderr)
             return
         default_dict = default_dict_path.read_text(encoding="utf-8")
         if default_dict == default_dict.rstrip():
             default_dict += "\n"
         csv_text += default_dict
+
+        # ユーザー辞書データの追加
         user_dict = read_dict(user_dict_path=user_dict_path)
         for word_uuid in user_dict:
             word = user_dict[word_uuid]
@@ -77,7 +106,7 @@ def update_dict(
             ).format(
                 surface=word.surface,
                 context_id=word.context_id,
-                cost=priority2cost(word.context_id, word.priority),
+                cost=_priority2cost(word.context_id, word.priority),
                 part_of_speech=word.part_of_speech,
                 part_of_speech_detail_1=word.part_of_speech_detail_1,
                 part_of_speech_detail_2=word.part_of_speech_detail_2,
@@ -91,6 +120,7 @@ def update_dict(
                 mora_count=word.mora_count,
                 accent_associative_rule=word.accent_associative_rule,
             )
+        # 辞書データを辞書.csv へ一時保存
         tmp_csv_path.write_text(csv_text, encoding="utf-8")
 
         # 辞書.csvをOpenJTalk用にコンパイル
@@ -119,10 +149,23 @@ def update_dict(
 
 @mutex_wrapper(mutex_user_dict)
 def read_dict(user_dict_path: Path = user_dict_path) -> Dict[str, UserDictWord]:
+    """
+    ユーザー辞書の読み出し
+    Parameters
+    ----------
+    user_dict_path : Path
+        ユーザー辞書ファイルのパス
+    Returns
+    -------
+    result : Dict[str, UserDictWord]
+        ユーザー辞書
+    """
+    # 指定ユーザー辞書が存在しない場合、空辞書を返す
     if not user_dict_path.is_file():
         return {}
+
     with user_dict_path.open(encoding="utf-8") as f:
-        result = {}
+        result: Dict[str, UserDictWord] = {}
         for word_uuid, word in json.load(f).items():
             # cost2priorityで変換を行う際にcontext_idが必要となるが、
             # 0.12以前の辞書は、context_idがハードコーディングされていたためにユーザー辞書内に保管されていない
@@ -131,20 +174,39 @@ def read_dict(user_dict_path: Path = user_dict_path) -> Dict[str, UserDictWord]:
                 word["context_id"] = part_of_speech_data[
                     WordTypes.PROPER_NOUN
                 ].context_id
-            word["priority"] = cost2priority(word["context_id"], word["cost"])
+            word["priority"] = _cost2priority(word["context_id"], word["cost"])
             del word["cost"]
             result[str(UUID(word_uuid))] = UserDictWord(**word)
 
     return result
 
 
-def create_word(
+def _create_word(
     surface: str,
     pronunciation: str,
     accent_type: int,
     word_type: Optional[WordTypes] = None,
     priority: Optional[int] = None,
 ) -> UserDictWord:
+    """
+    単語オブジェクトの生成
+    Parameters
+    ----------
+    surface : str
+        単語情報
+    pronunciation : str
+        単語情報
+    accent_type : int
+        単語情報
+    word_type : Optional[WordTypes]
+        品詞
+    priority : Optional[int]
+        優先度
+    Returns
+    -------
+    : UserDictWord
+        単語オブジェクト
+    """
     if word_type is None:
         word_type = WordTypes.PROPER_NOUN
     if word_type not in part_of_speech_data.keys():
@@ -181,7 +243,31 @@ def apply_word(
     user_dict_path: Path = user_dict_path,
     compiled_dict_path: Path = compiled_dict_path,
 ) -> str:
-    word = create_word(
+    """
+    新規単語の追加
+    Parameters
+    ----------
+    surface : str
+        単語情報
+    pronunciation : str
+        単語情報
+    accent_type : int
+        単語情報
+    word_type : Optional[WordTypes]
+        品詞
+    priority : Optional[int]
+        優先度
+    user_dict_path : Path
+        ユーザー辞書ファイルのパス
+    compiled_dict_path : Path
+        コンパイル済み辞書ファイルのパス
+    Returns
+    -------
+    word_uuid : UserDictWord
+        追加された単語に発行されたUUID
+    """
+    # 新規単語の追加による辞書データの更新
+    word = _create_word(
         surface=surface,
         pronunciation=pronunciation,
         accent_type=accent_type,
@@ -191,8 +277,11 @@ def apply_word(
     user_dict = read_dict(user_dict_path=user_dict_path)
     word_uuid = str(uuid4())
     user_dict[word_uuid] = word
-    write_to_json(user_dict, user_dict_path)
+
+    # 更新された辞書データの保存と適用
+    _write_to_json(user_dict, user_dict_path)
     update_dict(user_dict_path=user_dict_path, compiled_dict_path=compiled_dict_path)
+
     return word_uuid
 
 
@@ -205,32 +294,71 @@ def rewrite_word(
     priority: Optional[int] = None,
     user_dict_path: Path = user_dict_path,
     compiled_dict_path: Path = compiled_dict_path,
-):
-    word = create_word(
+) -> None:
+    """
+    既存単語の上書き更新
+    Parameters
+    ----------
+    word_uuid : str
+        単語UUID
+    surface : str
+        単語情報
+    pronunciation : str
+        単語情報
+    accent_type : int
+        単語情報
+    word_type : Optional[WordTypes]
+        品詞
+    priority : Optional[int]
+        優先度
+    user_dict_path : Path
+        ユーザー辞書ファイルのパス
+    compiled_dict_path : Path
+        コンパイル済み辞書ファイルのパス
+    """
+    word = _create_word(
         surface=surface,
         pronunciation=pronunciation,
         accent_type=accent_type,
         word_type=word_type,
         priority=priority,
     )
+
+    # 既存単語の上書きによる辞書データの更新
     user_dict = read_dict(user_dict_path=user_dict_path)
     if word_uuid not in user_dict:
         raise HTTPException(status_code=422, detail="UUIDに該当するワードが見つかりませんでした")
     user_dict[word_uuid] = word
-    write_to_json(user_dict, user_dict_path)
+
+    # 更新された辞書データの保存と適用
+    _write_to_json(user_dict, user_dict_path)
     update_dict(user_dict_path=user_dict_path, compiled_dict_path=compiled_dict_path)
 
 
 def delete_word(
     word_uuid: str,
     user_dict_path: Path = user_dict_path,
     compiled_dict_path: Path = compiled_dict_path,
-):
+) -> None:
+    """
+    単語の削除
+    Parameters
+    ----------
+    word_uuid : str
+        単語UUID
+    user_dict_path : Path
+        ユーザー辞書ファイルのパス
+    compiled_dict_path : Path
+        コンパイル済み辞書ファイルのパス
+    """
+    # 既存単語の削除による辞書データの更新
     user_dict = read_dict(user_dict_path=user_dict_path)
     if word_uuid not in user_dict:
         raise HTTPException(status_code=422, detail="IDに該当するワードが見つかりませんでした")
     del user_dict[word_uuid]
-    write_to_json(user_dict, user_dict_path)
+
+    # 更新された辞書データの保存と適用
+    _write_to_json(user_dict, user_dict_path)
     update_dict(user_dict_path=user_dict_path, compiled_dict_path=compiled_dict_path)
 
 
@@ -240,8 +368,23 @@ def import_user_dict(
     user_dict_path: Path = user_dict_path,
     default_dict_path: Path = default_dict_path,
     compiled_dict_path: Path = compiled_dict_path,
-):
-    # 念のため型チェックを行う
+) -> None:
+    """
+    ユーザー辞書のインポート
+    Parameters
+    ----------
+    dict_data : Dict[str, UserDictWord]
+        インポートするユーザー辞書のデータ
+    override : bool
+        重複したエントリがあった場合、上書きするかどうか
+    user_dict_path : Path
+        ユーザー辞書ファイルのパス
+    default_dict_path : Path
+        デフォルト辞書ファイルのパス
+    compiled_dict_path : Path
+        コンパイル済み辞書ファイルのパス
+    """
+    # インポートする辞書データのバリデーション
     for word_uuid, word in dict_data.items():
         UUID(word_uuid)
         assert isinstance(word, UserDictWord)
@@ -263,36 +406,44 @@ def import_user_dict(
                 break
         else:
             raise ValueError("対応していない品詞です")
+
+    # 既存辞書の読み出し
     old_dict = read_dict(user_dict_path=user_dict_path)
+
+    # 辞書データの更新
+    # 重複エントリの上書き
     if override:
         new_dict = {**old_dict, **dict_data}
+    # 重複エントリの保持
     else:
         new_dict = {**dict_data, **old_dict}
-    write_to_json(user_dict=new_dict, user_dict_path=user_dict_path)
+
+    # 更新された辞書データの保存と適用
+    _write_to_json(user_dict=new_dict, user_dict_path=user_dict_path)
     update_dict(
         default_dict_path=default_dict_path,
         user_dict_path=user_dict_path,
         compiled_dict_path=compiled_dict_path,
     )
 
 
-def search_cost_candidates(context_id: int) -> List[int]:
+def _search_cost_candidates(context_id: int) -> List[int]:
     for value in part_of_speech_data.values():
         if value.context_id == context_id:
             return value.cost_candidates
     raise HTTPException(status_code=422, detail="品詞IDが不正です")
 
 
-def cost2priority(context_id: int, cost: conint(ge=-32768, le=32767)) -> int:
-    cost_candidates = search_cost_candidates(context_id)
+def _cost2priority(context_id: int, cost: conint(ge=-32768, le=32767)) -> int:
+    cost_candidates = _search_cost_candidates(context_id)
     # cost_candidatesの中にある値で最も近い値を元にpriorityを返す
     # 参考: https://qiita.com/Krypf/items/2eada91c37161d17621d
     # この関数とpriority2cost関数によって、辞書ファイルのcostを操作しても最も近いpriorityのcostに上書きされる
     return MAX_PRIORITY - np.argmin(np.abs(np.array(cost_candidates) - cost))
 
 
-def priority2cost(
+def _priority2cost(
     context_id: int, priority: conint(ge=MIN_PRIORITY, le=MAX_PRIORITY)
 ) -> int:
-    cost_candidates = search_cost_candidates(context_id)
+    cost_candidates = _search_cost_candidates(context_id)
     return cost_candidates[MAX_PRIORITY - priority]