From 040f33e71397770af1d96e88cb2a4c3975555d6a Mon Sep 17 00:00:00 2001 From: tarepan Date: Tue, 5 Dec 2023 03:53:36 +0000 Subject: [PATCH] =?UTF-8?q?Remove:=20`OjtPhoneme`=20`start`=20/=20`end`=20?= =?UTF-8?q?=E5=BB=83=E6=AD=A2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- test/test_acoustic_feature_extractor.py | 30 +------ test/test_synthesis_engine.py | 85 +++++++++---------- voicevox_engine/acoustic_feature_extractor.py | 13 +-- .../synthesis_engine/synthesis_engine.py | 5 +- 4 files changed, 45 insertions(+), 88 deletions(-) diff --git a/test/test_acoustic_feature_extractor.py b/test/test_acoustic_feature_extractor.py index 0a1a16ae3..94ef7ac63 100644 --- a/test/test_acoustic_feature_extractor.py +++ b/test/test_acoustic_feature_extractor.py @@ -3,28 +3,12 @@ from voicevox_engine.acoustic_feature_extractor import OjtPhoneme -def is_same_phoneme(p1: OjtPhoneme, p2: OjtPhoneme) -> bool: - """2つのOjtPhonemeが同じ`.phoneme`/`.start`/`.end`を持つ""" - return p1.phoneme == p2.phoneme and p1.start == p2.start and p1.end == p2.end - - class TestOjtPhoneme(TestCase): def setUp(self): super().setUp() # list_idx 0 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 hello_hiho = "sil k o N n i ch i w a pau h i h o d e s U sil".split() - self.ojt_hello_hiho = [ - OjtPhoneme(s, i, i + 1) for i, s in enumerate(hello_hiho) - ] - - def test_repr_(self): - self.assertEqual( - self.ojt_hello_hiho[1].__repr__(), "Phoneme(phoneme='k', start=1, end=2)" - ) - self.assertEqual( - self.ojt_hello_hiho[10].__repr__(), - "Phoneme(phoneme='pau', start=10, end=11)", - ) + self.ojt_hello_hiho = [OjtPhoneme(s) for s in hello_hiho] def test_phoneme_list(self): self.assertEqual(OjtPhoneme.phoneme_list[1], "A") @@ -39,19 +23,9 @@ def test_const(self): self.assertEqual(OjtPhoneme.space_phoneme, "pau") def test_convert(self): - sil_phoneme = OjtPhoneme("sil", 0, 0) + sil_phoneme = OjtPhoneme("sil") self.assertEqual(sil_phoneme.phoneme, "pau") - def test_equal(self): - # ojt_hello_hihoの10番目の"a"と比較 - true_ojt_phoneme = OjtPhoneme("a", 9, 10) - - false_ojt_phoneme_1 = OjtPhoneme("k", 9, 10) - false_ojt_phoneme_2 = OjtPhoneme("a", 10, 11) - self.assertTrue(is_same_phoneme(self.ojt_hello_hiho[9], true_ojt_phoneme)) - self.assertFalse(is_same_phoneme(self.ojt_hello_hiho[9], false_ojt_phoneme_1)) - self.assertFalse(is_same_phoneme(self.ojt_hello_hiho[9], false_ojt_phoneme_2)) - def test_phoneme_id(self): ojt_str_hello_hiho = " ".join([str(p.phoneme_id) for p in self.ojt_hello_hiho]) self.assertEqual( diff --git a/test/test_synthesis_engine.py b/test/test_synthesis_engine.py index 4a95abf61..f9bfa2078 100644 --- a/test/test_synthesis_engine.py +++ b/test/test_synthesis_engine.py @@ -24,15 +24,18 @@ unvoiced_mora_phoneme_list, ) -from .test_acoustic_feature_extractor import is_same_phoneme - TRUE_NUM_PHONEME = 45 +def is_same_phoneme(p1: OjtPhoneme, p2: OjtPhoneme) -> bool: + """2つのOjtPhonemeが同じ `.phoneme` を持つ""" + return p1.phoneme == p2.phoneme + + def is_same_ojt_phoneme_list( p1s: list[OjtPhoneme | None], p2s: list[OjtPhoneme | None] ) -> bool: - """2つのOjtPhonemeリストで全要素ペアが同じ`.phoneme`/`.start`/`.end`を持つ""" + """2つのOjtPhonemeリストで全要素ペアが同じ `.phoneme` を持つ""" if len(p1s) != len(p2s): return False @@ -230,7 +233,7 @@ def test_calc_frame_pitch(): _gen_mora(" ", None, None, " ", 0.0, 0.0), ] phoneme_str = "pau k o N pau h i h O pau" - phonemes = [OjtPhoneme(p, 0, 0) for p in phoneme_str.split()] + phonemes = [OjtPhoneme(p) for p in phoneme_str.split()] # Pre k o N pau h i h O Pst frame_per_phoneme = [1, 1, 2, 2, 1, 1, 2, 2, 1, 3] frame_per_phoneme = numpy.array(frame_per_phoneme, dtype=numpy.int32) @@ -254,7 +257,7 @@ def test_calc_frame_phoneme(): """Test `calc_frame_phoneme`.""" # Inputs phoneme_str = "pau k o N pau h i h O pau" - phonemes = [OjtPhoneme(p, 0, 0) for p in phoneme_str.split()] + phonemes = [OjtPhoneme(p) for p in phoneme_str.split()] # Pre k o N pau h i h O Pst frame_per_phoneme = [1, 1, 2, 2, 1, 1, 2, 2, 1, 3] n_frame = sum(frame_per_phoneme) @@ -291,7 +294,7 @@ def test_feat_to_framescale(): _gen_mora("ホ", "h", 4 * 0.01067, "O", 2 * 0.01067, 0.0), ] phoneme_str = "pau k o N pau h i h O pau" - phoneme_data_list = [OjtPhoneme(p, 0, 0) for p in phoneme_str.split()] + phoneme_data_list = [OjtPhoneme(p) for p in phoneme_str.split()] # Expects # frame_per_phoneme @@ -338,10 +341,8 @@ def setUp(self): "sil k o N n i ch i w a pau h i h o d e s U sil".split() ) self.phoneme_data_list_hello_hiho = [ - OjtPhoneme(phoneme=p, start=i, end=i + 1) - for i, p in enumerate( - "pau k o N n i ch i w a pau h i h o d e s U pau".split() - ) + OjtPhoneme(p) + for p in "pau k o N n i ch i w a pau h i h o d e s U pau".split() ] self.accent_phrases_hello_hiho = [ AccentPhrase( @@ -464,18 +465,18 @@ def test_split_mora(self): is_same_ojt_phoneme_list( vowel_phoneme_list, [ - OjtPhoneme(phoneme="pau", start=0, end=1), - OjtPhoneme(phoneme="o", start=2, end=3), - OjtPhoneme(phoneme="N", start=3, end=4), - OjtPhoneme(phoneme="i", start=5, end=6), - OjtPhoneme(phoneme="i", start=7, end=8), - OjtPhoneme(phoneme="a", start=9, end=10), - OjtPhoneme(phoneme="pau", start=10, end=11), - OjtPhoneme(phoneme="i", start=12, end=13), - OjtPhoneme(phoneme="o", start=14, end=15), - OjtPhoneme(phoneme="e", start=16, end=17), - OjtPhoneme(phoneme="U", start=18, end=19), - OjtPhoneme(phoneme="pau", start=19, end=20), + OjtPhoneme("pau"), + OjtPhoneme("o"), + OjtPhoneme("N"), + OjtPhoneme("i"), + OjtPhoneme("i"), + OjtPhoneme("a"), + OjtPhoneme("pau"), + OjtPhoneme("i"), + OjtPhoneme("o"), + OjtPhoneme("e"), + OjtPhoneme("U"), + OjtPhoneme("pau"), ], ) ) @@ -484,16 +485,16 @@ def test_split_mora(self): consonant_phoneme_list, [ None, - OjtPhoneme(phoneme="k", start=1, end=2), + OjtPhoneme("k"), None, - OjtPhoneme(phoneme="n", start=4, end=5), - OjtPhoneme(phoneme="ch", start=6, end=7), - OjtPhoneme(phoneme="w", start=8, end=9), + OjtPhoneme("n"), + OjtPhoneme("ch"), + OjtPhoneme("w"), None, - OjtPhoneme(phoneme="h", start=11, end=12), - OjtPhoneme(phoneme="h", start=13, end=14), - OjtPhoneme(phoneme="d", start=15, end=16), - OjtPhoneme(phoneme="s", start=17, end=18), + OjtPhoneme("h"), + OjtPhoneme("h"), + OjtPhoneme("d"), + OjtPhoneme("s"), None, ], ) @@ -507,7 +508,7 @@ def test_pre_process(self): mora_index = 0 phoneme_index = 1 - self.assertTrue(is_same_phoneme(phoneme_data_list[0], OjtPhoneme("pau", 0, 1))) + self.assertTrue(is_same_phoneme(phoneme_data_list[0], OjtPhoneme("pau"))) for accent_phrase in self.accent_phrases_hello_hiho: moras = accent_phrase.moras for mora in moras: @@ -517,16 +518,14 @@ def test_pre_process(self): self.assertTrue( is_same_phoneme( phoneme_data_list[phoneme_index], - OjtPhoneme( - mora.consonant, phoneme_index, phoneme_index + 1 - ), + OjtPhoneme(mora.consonant), ) ) phoneme_index += 1 self.assertTrue( is_same_phoneme( phoneme_data_list[phoneme_index], - OjtPhoneme(mora.vowel, phoneme_index, phoneme_index + 1), + OjtPhoneme(mora.vowel), ) ) phoneme_index += 1 @@ -536,14 +535,14 @@ def test_pre_process(self): self.assertTrue( is_same_phoneme( phoneme_data_list[phoneme_index], - OjtPhoneme("pau", phoneme_index, phoneme_index + 1), + OjtPhoneme("pau"), ) ) phoneme_index += 1 self.assertTrue( is_same_phoneme( phoneme_data_list[phoneme_index], - OjtPhoneme("pau", phoneme_index, phoneme_index + 1), + OjtPhoneme("pau"), ) ) @@ -699,7 +698,7 @@ def test_replace_mora_pitch(self): def result_value(i: int): # unvoiced_mora_phoneme_listのPhoneme ID版 unvoiced_mora_phoneme_id_list = [ - OjtPhoneme(p, 0, 0).phoneme_id for p in unvoiced_mora_phoneme_list + OjtPhoneme(p).phoneme_id for p in unvoiced_mora_phoneme_list ] if vowel_phoneme_list[i] in unvoiced_mora_phoneme_id_list: return 0 @@ -736,17 +735,17 @@ def synthesis_test_base(self, audio_query: AudioQuery): if mora.consonant is not None: mora.consonant_length = 0.1 phoneme_length_list.append(0.1) - phoneme_id_list.append(OjtPhoneme(mora.consonant, 0, 0).phoneme_id) + phoneme_id_list.append(OjtPhoneme(mora.consonant).phoneme_id) mora.vowel_length = 0.2 phoneme_length_list.append(0.2) - phoneme_id_list.append(OjtPhoneme(mora.vowel, 0, 0).phoneme_id) + phoneme_id_list.append(OjtPhoneme(mora.vowel).phoneme_id) if mora.vowel not in unvoiced_mora_phoneme_list: mora.pitch = 5.0 + random() f0_list.append(mora.pitch) if accent_phrase.pause_mora is not None: accent_phrase.pause_mora.vowel_length = 0.2 phoneme_length_list.append(0.2) - phoneme_id_list.append(OjtPhoneme("pau", 0, 0).phoneme_id) + phoneme_id_list.append(OjtPhoneme("pau").phoneme_id) f0_list.append(0.0) phoneme_length_list.append(0.0) phoneme_id_list.append(0) @@ -770,9 +769,7 @@ def synthesis_test_base(self, audio_query: AudioQuery): num_phoneme = OjtPhoneme.num_phoneme # mora_phoneme_listのPhoneme ID版 - mora_phoneme_id_list = [ - OjtPhoneme(p, 0, 0).phoneme_id for p in mora_phoneme_list - ] + mora_phoneme_id_list = [OjtPhoneme(p).phoneme_id for p in mora_phoneme_list] # numpy.repeatをfor文でやる f0 = [] diff --git a/voicevox_engine/acoustic_feature_extractor.py b/voicevox_engine/acoustic_feature_extractor.py index f579e6b41..516727056 100644 --- a/voicevox_engine/acoustic_feature_extractor.py +++ b/voicevox_engine/acoustic_feature_extractor.py @@ -65,22 +65,11 @@ class OjtPhoneme: num_phoneme = len(phoneme_list) space_phoneme = "pau" - def __init__( - self, - phoneme: str, - start: float, - end: float, - ): + def __init__(self, phoneme: str): # `sil`-to-`pau` (silent to space_phoneme) conversion if "sil" in phoneme: phoneme = self.space_phoneme - self.phoneme = phoneme - self.start = numpy.round(start, decimals=2) - self.end = numpy.round(end, decimals=2) - - def __repr__(self): - return f"Phoneme(phoneme='{self.phoneme}', start={self.start}, end={self.end})" def __eq__(self, o: object): """Deprecated.""" diff --git a/voicevox_engine/synthesis_engine/synthesis_engine.py b/voicevox_engine/synthesis_engine/synthesis_engine.py index 33dcaa967..9bd7dde56 100644 --- a/voicevox_engine/synthesis_engine/synthesis_engine.py +++ b/voicevox_engine/synthesis_engine/synthesis_engine.py @@ -102,10 +102,7 @@ def pre_process( phoneme_str_list = list(chain.from_iterable(phoneme_each_mora)) phoneme_str_list = ["pau"] + phoneme_str_list + ["pau"] - phoneme_data_list = [ - OjtPhoneme(phoneme=p, start=i, end=i + 1) - for i, p in enumerate(phoneme_str_list) - ] + phoneme_data_list = list(map(OjtPhoneme, phoneme_str_list)) return flatten_moras, phoneme_data_list