Skip to content

Commit

Permalink
追加: OjtPhoneme.onehot() float 出力 (#810)
Browse files Browse the repository at this point in the history
  • Loading branch information
tarepan authored Dec 5, 2023
1 parent 16845ab commit 349bf58
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 21 deletions.
4 changes: 2 additions & 2 deletions test/test_acoustic_feature_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,6 @@ def test_onehot(self):
for i, phoneme in enumerate(self.ojt_hello_hiho):
for j in range(OjtPhoneme.num_phoneme):
if phoneme_id_list[i] == j:
self.assertEqual(phoneme.onehot[j], True)
self.assertEqual(phoneme.onehot[j], 1.0)
else:
self.assertEqual(phoneme.onehot[j], False)
self.assertEqual(phoneme.onehot[j], 0.0)
10 changes: 5 additions & 5 deletions voicevox_engine/acoustic_feature_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,12 +100,12 @@ def phoneme_id(self):
@property
def onehot(self):
"""
phoneme listの長さ分の0埋め配列のうち、phoneme id番目がTrue(1)の配列を返す
音素onehotベクトル
Returns
-------
onehot : numpu.ndarray
関数内で変更された配列を返す
onehot : numpy.ndarray
音素onehotベクトル(listの長さ分の0埋め配列のうち、phoneme id番目が1.0の配列)
"""
array = numpy.zeros(self.num_phoneme, dtype=bool)
array[self.phoneme_id] = True
array = numpy.zeros(self.num_phoneme, dtype=numpy.float32)
array[self.phoneme_id] = 1.0
return array
17 changes: 3 additions & 14 deletions voicevox_engine/synthesis_engine/synthesis_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -198,7 +198,7 @@ def calc_frame_pitch(

def calc_frame_phoneme(phonemes: List[OjtPhoneme], frame_per_phoneme: numpy.ndarray):
"""
フレームごとの音素列の生成
フレームごとの音素列の生成(onehot化 + フレーム化)
Parameters
----------
phonemes : List[OjtPhoneme]
Expand All @@ -211,19 +211,8 @@ def calc_frame_phoneme(phonemes: List[OjtPhoneme], frame_per_phoneme: numpy.ndar
フレームごとの音素系列
"""
# TODO: Better function name (c.f. VOICEVOX/voicevox_engine#790)
# Index化
phoneme_ids = numpy.array([p.phoneme_id for p in phonemes], dtype=numpy.int64)

# フレームごとの音素化
frame_phoneme = numpy.repeat(phoneme_ids, frame_per_phoneme)

# Onehot化
array = numpy.zeros(
(len(frame_phoneme), OjtPhoneme.num_phoneme), dtype=numpy.float32
)
array[numpy.arange(len(frame_phoneme)), frame_phoneme] = 1
frame_phoneme = array

onehot_phoneme = numpy.stack([p.onehot for p in phonemes])
frame_phoneme = numpy.repeat(onehot_phoneme, frame_per_phoneme, axis=0)
return frame_phoneme


Expand Down

0 comments on commit 349bf58

Please sign in to comment.