diff --git a/test/test_synthesis_engine.py b/test/test_synthesis_engine.py
index 3c38830d0..a5536bb15 100644
--- a/test/test_synthesis_engine.py
+++ b/test/test_synthesis_engine.py
@@ -13,8 +13,8 @@
 
 # TODO: import from voicevox_engine.synthesis_engine.mora
 from voicevox_engine.synthesis_engine.synthesis_engine import (
-    apply_pitch,
     apply_intonation,
+    apply_pitch,
     apply_volume,
     calc_frame_per_phoneme,
     calc_frame_phoneme,
@@ -230,10 +230,10 @@ def test_apply_volume():
     """Test `apply_volume`."""
     # Inputs
     query = _gen_query(volumeScale=3.0)
-    input_wave = numpy.array([0.0, 1.0, 2.0, 0.0,])
+    input_wave = numpy.array([0.0, 1.0, 2.0])
 
     # Expects - x3 scale
-    true_wave = numpy.array([0.0, 3.0, 6.0, 0.0,])
+    true_wave = numpy.array([0.0, 3.0, 6.0])
 
     # Outputs
     wave = apply_volume(input_wave, query)
diff --git a/voicevox_engine/synthesis_engine/synthesis_engine.py b/voicevox_engine/synthesis_engine/synthesis_engine.py
index 07def1048..9bb139ae0 100644
--- a/voicevox_engine/synthesis_engine/synthesis_engine.py
+++ b/voicevox_engine/synthesis_engine/synthesis_engine.py
@@ -219,17 +219,17 @@ def calc_frame_pitch(
     moras = apply_pitch(moras, query)
     moras = apply_intonation(moras, query)
 
+    # Convert: Core入力形式への変換（スカラ系列）
     # TODO: Better function name (c.f. VOICEVOX/voicevox_engine#790)
     # モーラ（前後の無音含む）ごとの基本周波数
     f0 = numpy.array([0] + [mora.pitch for mora in moras] + [0], dtype=numpy.float32)
 
-    # フレームごとのピッチ化
+    # Rescale: 時間スケールの変更（モーラ -> フレーム）
     # 母音インデックスに基づき "音素あたりのフレーム長" を "モーラあたりのフレーム長" に集約
     vowel_indexes = numpy.array(split_mora(phonemes)[2])
     frame_per_mora = [
         a.sum() for a in numpy.split(frame_per_phoneme, vowel_indexes[:-1] + 1)
     ]
-    # モーラの基本周波数を子音・母音に割当てフレーム化
     frame_f0 = numpy.repeat(f0, frame_per_mora)
     return frame_f0
 
@@ -267,7 +267,10 @@ def calc_frame_phoneme(phonemes: List[OjtPhoneme], frame_per_phoneme: numpy.ndar
         フレームごとの音素系列
     """
     # TODO: Better function name (c.f. VOICEVOX/voicevox_engine#790)
+    # Convert: Core入力形式への変換（onehotベクトル系列）
     onehot_phoneme = numpy.stack([p.onehot for p in phonemes])
+
+    # Rescale: 時間スケールの変更（音素 -> フレーム）
     frame_phoneme = numpy.repeat(onehot_phoneme, frame_per_phoneme, axis=0)
     return frame_phoneme
 
@@ -545,7 +548,7 @@ def _synthesis_impl(self, query: AudioQuery, style_id: int):
                 phoneme=phoneme,
                 style_id=numpy.array(style_id, dtype=numpy.int64).reshape(-1),
             )
- 
+
         # Apply: グローバル特徴量による補正（音量）
         wave = apply_volume(wave, query)