Skip to content

Commit

Permalink
Refactor: apply_speed の切り出し
Browse files Browse the repository at this point in the history
  • Loading branch information
tarepan committed Dec 6, 2023
1 parent 1c108f3 commit a8603d4
Show file tree
Hide file tree
Showing 2 changed files with 52 additions and 4 deletions.
28 changes: 28 additions & 0 deletions test/test_synthesis_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from voicevox_engine.synthesis_engine.synthesis_engine import (
apply_intonation,
apply_pitch,
apply_speed,
apply_volume,
calc_frame_per_phoneme,
calc_frame_phoneme,
Expand Down Expand Up @@ -194,6 +195,33 @@ def test_pad_with_silence():
assert moras_with_silence == true_moras_with_silence


def test_apply_speed():
"""Test `apply_speed`."""
# Inputs
query = _gen_query(speedScale=2.0)
input_moras = [
_gen_mora("コ", "k", 2 * 0.01067, "o", 4 * 0.01067, 50.0),
_gen_mora("ン", None, None, "N", 4 * 0.01067, 50.0),
_gen_mora("、", None, None, "pau", 2 * 0.01067, 0.0),
_gen_mora("ヒ", "h", 2 * 0.01067, "i", 4 * 0.01067, 125.0),
_gen_mora("ホ", "h", 4 * 0.01067, "O", 2 * 0.01067, 0.0),
]

# Expects - x2 fast
true_moras = [
_gen_mora("コ", "k", 1 * 0.01067, "o", 2 * 0.01067, 50.0),
_gen_mora("ン", None, None, "N", 2 * 0.01067, 50.0),
_gen_mora("、", None, None, "pau", 1 * 0.01067, 0.0),
_gen_mora("ヒ", "h", 1 * 0.01067, "i", 2 * 0.01067, 125.0),
_gen_mora("ホ", "h", 2 * 0.01067, "O", 1 * 0.01067, 0.0),
]

# Outputs
moras = apply_speed(input_moras, query)

assert moras == true_moras


def test_apply_pitch():
"""Test `apply_pitch`."""
# Inputs
Expand Down
28 changes: 24 additions & 4 deletions voicevox_engine/synthesis_engine/synthesis_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,27 @@ def pad_with_silence(moras: list[Mora], query: AudioQuery) -> list[Mora]:
return moras


def apply_speed(moras: list[Mora], query: AudioQuery) -> list[Mora]:
"""
話速スケール(`speedScale`)の適用
Parameters
----------
moras : list[Mora]
モーラ系列
query : AudioQuery
音声合成クエリ
Returns
-------
moras : list[Mora]
話速スケールが適用されたモーラ系列
"""
for mora in moras:
mora.vowel_length /= query.speedScale
if mora.consonant_length:
mora.consonant_length /= query.speedScale
return moras


def calc_frame_per_phoneme(query: AudioQuery, moras: List[Mora]):
"""
音素あたりのフレーム長を算出
Expand All @@ -149,6 +170,9 @@ def calc_frame_per_phoneme(query: AudioQuery, moras: List[Mora]):
frame_per_phoneme : NDArray[]
音素あたりのフレーム長。端数丸め。
"""
# Apply: グローバル特徴量による補正(話速)
moras = apply_speed(moras, query)

# 音素あたりの継続長
sec_per_phoneme = numpy.array(
[
Expand All @@ -161,10 +185,6 @@ def calc_frame_per_phoneme(query: AudioQuery, moras: List[Mora]):
],
dtype=numpy.float32,
)

# 話速による継続長の補正
sec_per_phoneme /= query.speedScale

# 音素あたりのフレーム長。端数丸め。
framerate = 24000 / 256 # framerate 93.75 [frame/sec]
frame_per_phoneme = numpy.round(sec_per_phoneme * framerate).astype(numpy.int32)
Expand Down

0 comments on commit a8603d4

Please sign in to comment.