From 349bf58cc6d79b2bc95925dcf1da0b87eeeea32f Mon Sep 17 00:00:00 2001
From: tarepan <tarepan5884@gmail.com>
Date: Tue, 5 Dec 2023 23:31:40 +0900
Subject: [PATCH] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20`OjtPhoneme.onehot()`=20f?=
 =?UTF-8?q?loat=20=E5=87=BA=E5=8A=9B=20(#810)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 test/test_acoustic_feature_extractor.py         |  4 ++--
 voicevox_engine/acoustic_feature_extractor.py   | 10 +++++-----
 .../synthesis_engine/synthesis_engine.py        | 17 +++--------------
 3 files changed, 10 insertions(+), 21 deletions(-)

diff --git a/test/test_acoustic_feature_extractor.py b/test/test_acoustic_feature_extractor.py
index cf1c7d9f7..0a1a16ae3 100644
--- a/test/test_acoustic_feature_extractor.py
+++ b/test/test_acoustic_feature_extractor.py
@@ -84,6 +84,6 @@ def test_onehot(self):
         for i, phoneme in enumerate(self.ojt_hello_hiho):
             for j in range(OjtPhoneme.num_phoneme):
                 if phoneme_id_list[i] == j:
-                    self.assertEqual(phoneme.onehot[j], True)
+                    self.assertEqual(phoneme.onehot[j], 1.0)
                 else:
-                    self.assertEqual(phoneme.onehot[j], False)
+                    self.assertEqual(phoneme.onehot[j], 0.0)
diff --git a/voicevox_engine/acoustic_feature_extractor.py b/voicevox_engine/acoustic_feature_extractor.py
index 111a4136f..f579e6b41 100644
--- a/voicevox_engine/acoustic_feature_extractor.py
+++ b/voicevox_engine/acoustic_feature_extractor.py
@@ -100,12 +100,12 @@ def phoneme_id(self):
     @property
     def onehot(self):
         """
-        phoneme listの長さ分の0埋め配列のうち、phoneme id番目がTrue(1)の配列を返す
+        音素onehotベクトル
         Returns
         -------
-        onehot : numpu.ndarray
-            関数内で変更された配列を返す
+        onehot : numpy.ndarray
+            音素onehotベクトル（listの長さ分の0埋め配列のうち、phoneme id番目が1.0の配列）
         """
-        array = numpy.zeros(self.num_phoneme, dtype=bool)
-        array[self.phoneme_id] = True
+        array = numpy.zeros(self.num_phoneme, dtype=numpy.float32)
+        array[self.phoneme_id] = 1.0
         return array
diff --git a/voicevox_engine/synthesis_engine/synthesis_engine.py b/voicevox_engine/synthesis_engine/synthesis_engine.py
index 850c906a9..7fd564ba7 100644
--- a/voicevox_engine/synthesis_engine/synthesis_engine.py
+++ b/voicevox_engine/synthesis_engine/synthesis_engine.py
@@ -198,7 +198,7 @@ def calc_frame_pitch(
 
 def calc_frame_phoneme(phonemes: List[OjtPhoneme], frame_per_phoneme: numpy.ndarray):
     """
-    フレームごとの音素列の生成
+    フレームごとの音素列の生成（onehot化 + フレーム化）
     Parameters
     ----------
     phonemes : List[OjtPhoneme]
@@ -211,19 +211,8 @@ def calc_frame_phoneme(phonemes: List[OjtPhoneme], frame_per_phoneme: numpy.ndar
         フレームごとの音素系列
     """
     # TODO: Better function name (c.f. VOICEVOX/voicevox_engine#790)
-    # Index化
-    phoneme_ids = numpy.array([p.phoneme_id for p in phonemes], dtype=numpy.int64)
-
-    # フレームごとの音素化
-    frame_phoneme = numpy.repeat(phoneme_ids, frame_per_phoneme)
-
-    # Onehot化
-    array = numpy.zeros(
-        (len(frame_phoneme), OjtPhoneme.num_phoneme), dtype=numpy.float32
-    )
-    array[numpy.arange(len(frame_phoneme)), frame_phoneme] = 1
-    frame_phoneme = array
-
+    onehot_phoneme = numpy.stack([p.onehot for p in phonemes])
+    frame_phoneme = numpy.repeat(onehot_phoneme, frame_per_phoneme, axis=0)
     return frame_phoneme