From 349bf58cc6d79b2bc95925dcf1da0b87eeeea32f Mon Sep 17 00:00:00 2001 From: tarepan Date: Tue, 5 Dec 2023 23:31:40 +0900 Subject: [PATCH] =?UTF-8?q?=E8=BF=BD=E5=8A=A0:=20`OjtPhoneme.onehot()`=20f?= =?UTF-8?q?loat=20=E5=87=BA=E5=8A=9B=20(#810)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/test_acoustic_feature_extractor.py | 4 ++-- voicevox_engine/acoustic_feature_extractor.py | 10 +++++----- .../synthesis_engine/synthesis_engine.py | 17 +++-------------- 3 files changed, 10 insertions(+), 21 deletions(-) diff --git a/test/test_acoustic_feature_extractor.py b/test/test_acoustic_feature_extractor.py index cf1c7d9f7..0a1a16ae3 100644 --- a/test/test_acoustic_feature_extractor.py +++ b/test/test_acoustic_feature_extractor.py @@ -84,6 +84,6 @@ def test_onehot(self): for i, phoneme in enumerate(self.ojt_hello_hiho): for j in range(OjtPhoneme.num_phoneme): if phoneme_id_list[i] == j: - self.assertEqual(phoneme.onehot[j], True) + self.assertEqual(phoneme.onehot[j], 1.0) else: - self.assertEqual(phoneme.onehot[j], False) + self.assertEqual(phoneme.onehot[j], 0.0) diff --git a/voicevox_engine/acoustic_feature_extractor.py b/voicevox_engine/acoustic_feature_extractor.py index 111a4136f..f579e6b41 100644 --- a/voicevox_engine/acoustic_feature_extractor.py +++ b/voicevox_engine/acoustic_feature_extractor.py @@ -100,12 +100,12 @@ def phoneme_id(self): @property def onehot(self): """ - phoneme listの長さ分の0埋め配列のうち、phoneme id番目がTrue(1)の配列を返す + 音素onehotベクトル Returns ------- - onehot : numpu.ndarray - 関数内で変更された配列を返す + onehot : numpy.ndarray + 音素onehotベクトル(listの長さ分の0埋め配列のうち、phoneme id番目が1.0の配列) """ - array = numpy.zeros(self.num_phoneme, dtype=bool) - array[self.phoneme_id] = True + array = numpy.zeros(self.num_phoneme, dtype=numpy.float32) + array[self.phoneme_id] = 1.0 return array diff --git a/voicevox_engine/synthesis_engine/synthesis_engine.py b/voicevox_engine/synthesis_engine/synthesis_engine.py index 850c906a9..7fd564ba7 100644 --- a/voicevox_engine/synthesis_engine/synthesis_engine.py +++ b/voicevox_engine/synthesis_engine/synthesis_engine.py @@ -198,7 +198,7 @@ def calc_frame_pitch( def calc_frame_phoneme(phonemes: List[OjtPhoneme], frame_per_phoneme: numpy.ndarray): """ - フレームごとの音素列の生成 + フレームごとの音素列の生成(onehot化 + フレーム化) Parameters ---------- phonemes : List[OjtPhoneme] @@ -211,19 +211,8 @@ def calc_frame_phoneme(phonemes: List[OjtPhoneme], frame_per_phoneme: numpy.ndar フレームごとの音素系列 """ # TODO: Better function name (c.f. VOICEVOX/voicevox_engine#790) - # Index化 - phoneme_ids = numpy.array([p.phoneme_id for p in phonemes], dtype=numpy.int64) - - # フレームごとの音素化 - frame_phoneme = numpy.repeat(phoneme_ids, frame_per_phoneme) - - # Onehot化 - array = numpy.zeros( - (len(frame_phoneme), OjtPhoneme.num_phoneme), dtype=numpy.float32 - ) - array[numpy.arange(len(frame_phoneme)), frame_phoneme] = 1 - frame_phoneme = array - + onehot_phoneme = numpy.stack([p.onehot for p in phonemes]) + frame_phoneme = numpy.repeat(onehot_phoneme, frame_per_phoneme, axis=0) return frame_phoneme