Skip to content

Commit

Permalink
Refactor: 前処理のApply/Convert/Rescale分類
Browse files Browse the repository at this point in the history
  • Loading branch information
tarepan committed Dec 6, 2023
1 parent 9746abd commit 1c108f3
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
6 changes: 3 additions & 3 deletions test/test_synthesis_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,8 @@

# TODO: import from voicevox_engine.synthesis_engine.mora
from voicevox_engine.synthesis_engine.synthesis_engine import (
apply_pitch,
apply_intonation,
apply_pitch,
apply_volume,
calc_frame_per_phoneme,
calc_frame_phoneme,
Expand Down Expand Up @@ -252,10 +252,10 @@ def test_apply_volume():
"""Test `apply_volume`."""
# Inputs
query = _gen_query(volumeScale=3.0)
input_wave = numpy.array([0.0, 1.0, 2.0, 0.0,])
input_wave = numpy.array([0.0, 1.0, 2.0])

# Expects - x3 scale
true_wave = numpy.array([0.0, 3.0, 6.0, 0.0,])
true_wave = numpy.array([0.0, 3.0, 6.0])

# Outputs
wave = apply_volume(input_wave, query)
Expand Down
9 changes: 6 additions & 3 deletions voicevox_engine/synthesis_engine/synthesis_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -241,17 +241,17 @@ def calc_frame_pitch(
moras = apply_pitch(moras, query)
moras = apply_intonation(moras, query)

# Convert: Core入力形式への変換(スカラ系列)
# TODO: Better function name (c.f. VOICEVOX/voicevox_engine#790)
# モーラごとの基本周波数
f0 = numpy.array([mora.pitch for mora in moras], dtype=numpy.float32)

# フレームごとのピッチ化
# Rescale: 時間スケールの変更(モーラ -> フレーム)
# 母音インデックスに基づき "音素あたりのフレーム長" を "モーラあたりのフレーム長" に集約
vowel_indexes = numpy.array(split_mora(phonemes)[2])
frame_per_mora = [
a.sum() for a in numpy.split(frame_per_phoneme, vowel_indexes[:-1] + 1)
]
# モーラの基本周波数を子音・母音に割当てフレーム化
frame_f0 = numpy.repeat(f0, frame_per_mora)
return frame_f0

Expand Down Expand Up @@ -289,7 +289,10 @@ def calc_frame_phoneme(phonemes: List[OjtPhoneme], frame_per_phoneme: numpy.ndar
フレームごとの音素系列
"""
# TODO: Better function name (c.f. VOICEVOX/voicevox_engine#790)
# Convert: Core入力形式への変換(onehotベクトル系列)
onehot_phoneme = numpy.stack([p.onehot for p in phonemes])

# Rescale: 時間スケールの変更(音素 -> フレーム)
frame_phoneme = numpy.repeat(onehot_phoneme, frame_per_phoneme, axis=0)
return frame_phoneme

Expand Down Expand Up @@ -568,7 +571,7 @@ def _synthesis_impl(self, query: AudioQuery, style_id: int):
phoneme=phoneme,
style_id=numpy.array(style_id, dtype=numpy.int64).reshape(-1),
)

# Apply: グローバル特徴量による補正(音量)
wave = apply_volume(wave, query)

Expand Down

0 comments on commit 1c108f3

Please sign in to comment.