Skip to content

Commit

Permalink
Remove: OjtPhoneme start / end 廃止
Browse files Browse the repository at this point in the history
  • Loading branch information
tarepan committed Dec 6, 2023
1 parent 26e14f8 commit 040f33e
Show file tree
Hide file tree
Showing 4 changed files with 45 additions and 88 deletions.
30 changes: 2 additions & 28 deletions test/test_acoustic_feature_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,28 +3,12 @@
from voicevox_engine.acoustic_feature_extractor import OjtPhoneme


def is_same_phoneme(p1: OjtPhoneme, p2: OjtPhoneme) -> bool:
"""2つのOjtPhonemeが同じ`.phoneme`/`.start`/`.end`を持つ"""
return p1.phoneme == p2.phoneme and p1.start == p2.start and p1.end == p2.end


class TestOjtPhoneme(TestCase):
def setUp(self):
super().setUp()
# list_idx 0 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9
hello_hiho = "sil k o N n i ch i w a pau h i h o d e s U sil".split()
self.ojt_hello_hiho = [
OjtPhoneme(s, i, i + 1) for i, s in enumerate(hello_hiho)
]

def test_repr_(self):
self.assertEqual(
self.ojt_hello_hiho[1].__repr__(), "Phoneme(phoneme='k', start=1, end=2)"
)
self.assertEqual(
self.ojt_hello_hiho[10].__repr__(),
"Phoneme(phoneme='pau', start=10, end=11)",
)
self.ojt_hello_hiho = [OjtPhoneme(s) for s in hello_hiho]

def test_phoneme_list(self):
self.assertEqual(OjtPhoneme.phoneme_list[1], "A")
Expand All @@ -39,19 +23,9 @@ def test_const(self):
self.assertEqual(OjtPhoneme.space_phoneme, "pau")

def test_convert(self):
sil_phoneme = OjtPhoneme("sil", 0, 0)
sil_phoneme = OjtPhoneme("sil")
self.assertEqual(sil_phoneme.phoneme, "pau")

def test_equal(self):
# ojt_hello_hihoの10番目の"a"と比較
true_ojt_phoneme = OjtPhoneme("a", 9, 10)

false_ojt_phoneme_1 = OjtPhoneme("k", 9, 10)
false_ojt_phoneme_2 = OjtPhoneme("a", 10, 11)
self.assertTrue(is_same_phoneme(self.ojt_hello_hiho[9], true_ojt_phoneme))
self.assertFalse(is_same_phoneme(self.ojt_hello_hiho[9], false_ojt_phoneme_1))
self.assertFalse(is_same_phoneme(self.ojt_hello_hiho[9], false_ojt_phoneme_2))

def test_phoneme_id(self):
ojt_str_hello_hiho = " ".join([str(p.phoneme_id) for p in self.ojt_hello_hiho])
self.assertEqual(
Expand Down
85 changes: 41 additions & 44 deletions test/test_synthesis_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,18 @@
unvoiced_mora_phoneme_list,
)

from .test_acoustic_feature_extractor import is_same_phoneme

TRUE_NUM_PHONEME = 45


def is_same_phoneme(p1: OjtPhoneme, p2: OjtPhoneme) -> bool:
"""2つのOjtPhonemeが同じ `.phoneme` を持つ"""
return p1.phoneme == p2.phoneme


def is_same_ojt_phoneme_list(
p1s: list[OjtPhoneme | None], p2s: list[OjtPhoneme | None]
) -> bool:
"""2つのOjtPhonemeリストで全要素ペアが同じ`.phoneme`/`.start`/`.end`を持つ"""
"""2つのOjtPhonemeリストで全要素ペアが同じ `.phoneme` を持つ"""
if len(p1s) != len(p2s):
return False

Expand Down Expand Up @@ -230,7 +233,7 @@ def test_calc_frame_pitch():
_gen_mora(" ", None, None, " ", 0.0, 0.0),
]
phoneme_str = "pau k o N pau h i h O pau"
phonemes = [OjtPhoneme(p, 0, 0) for p in phoneme_str.split()]
phonemes = [OjtPhoneme(p) for p in phoneme_str.split()]
# Pre k o N pau h i h O Pst
frame_per_phoneme = [1, 1, 2, 2, 1, 1, 2, 2, 1, 3]
frame_per_phoneme = numpy.array(frame_per_phoneme, dtype=numpy.int32)
Expand All @@ -254,7 +257,7 @@ def test_calc_frame_phoneme():
"""Test `calc_frame_phoneme`."""
# Inputs
phoneme_str = "pau k o N pau h i h O pau"
phonemes = [OjtPhoneme(p, 0, 0) for p in phoneme_str.split()]
phonemes = [OjtPhoneme(p) for p in phoneme_str.split()]
# Pre k o N pau h i h O Pst
frame_per_phoneme = [1, 1, 2, 2, 1, 1, 2, 2, 1, 3]
n_frame = sum(frame_per_phoneme)
Expand Down Expand Up @@ -291,7 +294,7 @@ def test_feat_to_framescale():
_gen_mora("ホ", "h", 4 * 0.01067, "O", 2 * 0.01067, 0.0),
]
phoneme_str = "pau k o N pau h i h O pau"
phoneme_data_list = [OjtPhoneme(p, 0, 0) for p in phoneme_str.split()]
phoneme_data_list = [OjtPhoneme(p) for p in phoneme_str.split()]

# Expects
# frame_per_phoneme
Expand Down Expand Up @@ -338,10 +341,8 @@ def setUp(self):
"sil k o N n i ch i w a pau h i h o d e s U sil".split()
)
self.phoneme_data_list_hello_hiho = [
OjtPhoneme(phoneme=p, start=i, end=i + 1)
for i, p in enumerate(
"pau k o N n i ch i w a pau h i h o d e s U pau".split()
)
OjtPhoneme(p)
for p in "pau k o N n i ch i w a pau h i h o d e s U pau".split()
]
self.accent_phrases_hello_hiho = [
AccentPhrase(
Expand Down Expand Up @@ -464,18 +465,18 @@ def test_split_mora(self):
is_same_ojt_phoneme_list(
vowel_phoneme_list,
[
OjtPhoneme(phoneme="pau", start=0, end=1),
OjtPhoneme(phoneme="o", start=2, end=3),
OjtPhoneme(phoneme="N", start=3, end=4),
OjtPhoneme(phoneme="i", start=5, end=6),
OjtPhoneme(phoneme="i", start=7, end=8),
OjtPhoneme(phoneme="a", start=9, end=10),
OjtPhoneme(phoneme="pau", start=10, end=11),
OjtPhoneme(phoneme="i", start=12, end=13),
OjtPhoneme(phoneme="o", start=14, end=15),
OjtPhoneme(phoneme="e", start=16, end=17),
OjtPhoneme(phoneme="U", start=18, end=19),
OjtPhoneme(phoneme="pau", start=19, end=20),
OjtPhoneme("pau"),
OjtPhoneme("o"),
OjtPhoneme("N"),
OjtPhoneme("i"),
OjtPhoneme("i"),
OjtPhoneme("a"),
OjtPhoneme("pau"),
OjtPhoneme("i"),
OjtPhoneme("o"),
OjtPhoneme("e"),
OjtPhoneme("U"),
OjtPhoneme("pau"),
],
)
)
Expand All @@ -484,16 +485,16 @@ def test_split_mora(self):
consonant_phoneme_list,
[
None,
OjtPhoneme(phoneme="k", start=1, end=2),
OjtPhoneme("k"),
None,
OjtPhoneme(phoneme="n", start=4, end=5),
OjtPhoneme(phoneme="ch", start=6, end=7),
OjtPhoneme(phoneme="w", start=8, end=9),
OjtPhoneme("n"),
OjtPhoneme("ch"),
OjtPhoneme("w"),
None,
OjtPhoneme(phoneme="h", start=11, end=12),
OjtPhoneme(phoneme="h", start=13, end=14),
OjtPhoneme(phoneme="d", start=15, end=16),
OjtPhoneme(phoneme="s", start=17, end=18),
OjtPhoneme("h"),
OjtPhoneme("h"),
OjtPhoneme("d"),
OjtPhoneme("s"),
None,
],
)
Expand All @@ -507,7 +508,7 @@ def test_pre_process(self):
mora_index = 0
phoneme_index = 1

self.assertTrue(is_same_phoneme(phoneme_data_list[0], OjtPhoneme("pau", 0, 1)))
self.assertTrue(is_same_phoneme(phoneme_data_list[0], OjtPhoneme("pau")))
for accent_phrase in self.accent_phrases_hello_hiho:
moras = accent_phrase.moras
for mora in moras:
Expand All @@ -517,16 +518,14 @@ def test_pre_process(self):
self.assertTrue(
is_same_phoneme(
phoneme_data_list[phoneme_index],
OjtPhoneme(
mora.consonant, phoneme_index, phoneme_index + 1
),
OjtPhoneme(mora.consonant),
)
)
phoneme_index += 1
self.assertTrue(
is_same_phoneme(
phoneme_data_list[phoneme_index],
OjtPhoneme(mora.vowel, phoneme_index, phoneme_index + 1),
OjtPhoneme(mora.vowel),
)
)
phoneme_index += 1
Expand All @@ -536,14 +535,14 @@ def test_pre_process(self):
self.assertTrue(
is_same_phoneme(
phoneme_data_list[phoneme_index],
OjtPhoneme("pau", phoneme_index, phoneme_index + 1),
OjtPhoneme("pau"),
)
)
phoneme_index += 1
self.assertTrue(
is_same_phoneme(
phoneme_data_list[phoneme_index],
OjtPhoneme("pau", phoneme_index, phoneme_index + 1),
OjtPhoneme("pau"),
)
)

Expand Down Expand Up @@ -699,7 +698,7 @@ def test_replace_mora_pitch(self):
def result_value(i: int):
# unvoiced_mora_phoneme_listのPhoneme ID版
unvoiced_mora_phoneme_id_list = [
OjtPhoneme(p, 0, 0).phoneme_id for p in unvoiced_mora_phoneme_list
OjtPhoneme(p).phoneme_id for p in unvoiced_mora_phoneme_list
]
if vowel_phoneme_list[i] in unvoiced_mora_phoneme_id_list:
return 0
Expand Down Expand Up @@ -736,17 +735,17 @@ def synthesis_test_base(self, audio_query: AudioQuery):
if mora.consonant is not None:
mora.consonant_length = 0.1
phoneme_length_list.append(0.1)
phoneme_id_list.append(OjtPhoneme(mora.consonant, 0, 0).phoneme_id)
phoneme_id_list.append(OjtPhoneme(mora.consonant).phoneme_id)
mora.vowel_length = 0.2
phoneme_length_list.append(0.2)
phoneme_id_list.append(OjtPhoneme(mora.vowel, 0, 0).phoneme_id)
phoneme_id_list.append(OjtPhoneme(mora.vowel).phoneme_id)
if mora.vowel not in unvoiced_mora_phoneme_list:
mora.pitch = 5.0 + random()
f0_list.append(mora.pitch)
if accent_phrase.pause_mora is not None:
accent_phrase.pause_mora.vowel_length = 0.2
phoneme_length_list.append(0.2)
phoneme_id_list.append(OjtPhoneme("pau", 0, 0).phoneme_id)
phoneme_id_list.append(OjtPhoneme("pau").phoneme_id)
f0_list.append(0.0)
phoneme_length_list.append(0.0)
phoneme_id_list.append(0)
Expand All @@ -770,9 +769,7 @@ def synthesis_test_base(self, audio_query: AudioQuery):

num_phoneme = OjtPhoneme.num_phoneme
# mora_phoneme_listのPhoneme ID版
mora_phoneme_id_list = [
OjtPhoneme(p, 0, 0).phoneme_id for p in mora_phoneme_list
]
mora_phoneme_id_list = [OjtPhoneme(p).phoneme_id for p in mora_phoneme_list]

# numpy.repeatをfor文でやる
f0 = []
Expand Down
13 changes: 1 addition & 12 deletions voicevox_engine/acoustic_feature_extractor.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,22 +65,11 @@ class OjtPhoneme:
num_phoneme = len(phoneme_list)
space_phoneme = "pau"

def __init__(
self,
phoneme: str,
start: float,
end: float,
):
def __init__(self, phoneme: str):
# `sil`-to-`pau` (silent to space_phoneme) conversion
if "sil" in phoneme:
phoneme = self.space_phoneme

self.phoneme = phoneme
self.start = numpy.round(start, decimals=2)
self.end = numpy.round(end, decimals=2)

def __repr__(self):
return f"Phoneme(phoneme='{self.phoneme}', start={self.start}, end={self.end})"

def __eq__(self, o: object):
"""Deprecated."""
Expand Down
5 changes: 1 addition & 4 deletions voicevox_engine/synthesis_engine/synthesis_engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,7 @@ def pre_process(
phoneme_str_list = list(chain.from_iterable(phoneme_each_mora))
phoneme_str_list = ["pau"] + phoneme_str_list + ["pau"]

phoneme_data_list = [
OjtPhoneme(phoneme=p, start=i, end=i + 1)
for i, p in enumerate(phoneme_str_list)
]
phoneme_data_list = list(map(OjtPhoneme, phoneme_str_list))

return flatten_moras, phoneme_data_list

Expand Down

0 comments on commit 040f33e

Please sign in to comment.