diff --git a/run.py b/run.py index 71f653760..06cd03660 100644 --- a/run.py +++ b/run.py @@ -196,9 +196,7 @@ def accent_phrases( """ if is_kana: try: - accent_phrases, interrogative_accent_phrase_marks = parse_kana( - text, enable_interrogative - ) + accent_phrases = parse_kana(text, enable_interrogative) except ParseKanaError as err: raise HTTPException( status_code=400, @@ -213,8 +211,6 @@ def accent_phrases( for accent_phrase in ( adjust_interrogative_accent_phrases( accent_phrases, - interrogative_accent_phrase_marks, - enable_interrogative, ) ) ] diff --git a/test/test_fastapi_model.py b/test/test_fastapi_model.py index bbd05a3d5..288f1cf27 100644 --- a/test/test_fastapi_model.py +++ b/test/test_fastapi_model.py @@ -72,6 +72,7 @@ def _accent_phrase(self): moras=self._moras(), accent=3, pause_mora=None, + is_interrogative=False, ) def _fastapi_accent_phrase(self): diff --git a/test/test_kana_parser.py b/test/test_kana_parser.py index f777217d8..31fcda284 100644 --- a/test/test_kana_parser.py +++ b/test/test_kana_parser.py @@ -7,7 +7,7 @@ def parse_kana(text: str) -> List[AccentPhrase]: - accent_phrases, _ = kana_parser.parse_kana(text, False) + accent_phrases = kana_parser.parse_kana(text, False) return accent_phrases @@ -61,17 +61,9 @@ def _interrogative_accent_phrase_marks_base( text: str, enable_interrogative: bool, expected_accent_phrases: List[AccentPhrase], - expected_interrogative_accent_phrase_marks: List[bool], ): - accent_phrases, interrogative_accent_phrase_marks = kana_parser.parse_kana( - text, enable_interrogative - ) - self.assertEqual(len(accent_phrases), len(interrogative_accent_phrase_marks)) + accent_phrases = kana_parser.parse_kana(text, enable_interrogative) self.assertEqual(expected_accent_phrases, accent_phrases) - self.assertEqual( - interrogative_accent_phrase_marks, - expected_interrogative_accent_phrase_marks, - ) def test_interrogative_accent_phrase_marks(self): def a_slash_a_accent_phrases(): @@ -89,6 +81,7 @@ def a_slash_a_accent_phrases(): ], accent=1, pause_mora=None, + is_interrogative=False, ), AccentPhrase( moras=[ @@ -103,6 +96,7 @@ def a_slash_a_accent_phrases(): ], accent=1, pause_mora=None, + is_interrogative=False, ), ] @@ -111,7 +105,6 @@ def a_slash_a_accent_phrases(): text="ア'/ア'", enable_interrogative=False, expected_accent_phrases=expected_accent_phrases, - expected_interrogative_accent_phrase_marks=[False, False], ) expected_accent_phrases = a_slash_a_accent_phrases() @@ -119,7 +112,6 @@ def a_slash_a_accent_phrases(): text="ア'/ア'", enable_interrogative=True, expected_accent_phrases=expected_accent_phrases, - expected_interrogative_accent_phrase_marks=[False, False], ) def a_jp_comma_a_accent_phrases(): @@ -144,6 +136,7 @@ def a_jp_comma_a_accent_phrases(): vowel_length=0.0, pitch=0.0, ), + is_interrogative=False, ), AccentPhrase( moras=[ @@ -158,6 +151,7 @@ def a_jp_comma_a_accent_phrases(): ], accent=1, pause_mora=None, + is_interrogative=False, ), ] @@ -166,7 +160,6 @@ def a_jp_comma_a_accent_phrases(): text="ア'、ア'", enable_interrogative=False, expected_accent_phrases=expected_accent_phrases, - expected_interrogative_accent_phrase_marks=[False, False], ) expected_accent_phrases = a_jp_comma_a_accent_phrases() @@ -174,7 +167,6 @@ def a_jp_comma_a_accent_phrases(): text="ア'、ア'", enable_interrogative=True, expected_accent_phrases=expected_accent_phrases, - expected_interrogative_accent_phrase_marks=[False, False], ) def a_slash_a_slash_a_slash_a_slash_a_accent_phrases(): @@ -192,6 +184,7 @@ def a_slash_a_slash_a_slash_a_slash_a_accent_phrases(): ], accent=1, pause_mora=None, + is_interrogative=False, ), AccentPhrase( moras=[ @@ -206,6 +199,7 @@ def a_slash_a_slash_a_slash_a_slash_a_accent_phrases(): ], accent=1, pause_mora=None, + is_interrogative=False, ), AccentPhrase( moras=[ @@ -220,6 +214,7 @@ def a_slash_a_slash_a_slash_a_slash_a_accent_phrases(): ], accent=1, pause_mora=None, + is_interrogative=False, ), AccentPhrase( moras=[ @@ -234,6 +229,7 @@ def a_slash_a_slash_a_slash_a_slash_a_accent_phrases(): ], accent=1, pause_mora=None, + is_interrogative=False, ), AccentPhrase( moras=[ @@ -248,6 +244,7 @@ def a_slash_a_slash_a_slash_a_slash_a_accent_phrases(): ], accent=1, pause_mora=None, + is_interrogative=False, ), ] @@ -256,26 +253,12 @@ def a_slash_a_slash_a_slash_a_slash_a_accent_phrases(): text="ア'/ア'/ア'/ア'/ア'", enable_interrogative=False, expected_accent_phrases=expected_accent_phrases, - expected_interrogative_accent_phrase_marks=[ - False, - False, - False, - False, - False, - ], ) expected_accent_phrases = a_slash_a_slash_a_slash_a_slash_a_accent_phrases() self._interrogative_accent_phrase_marks_base( text="ア'/ア'/ア'/ア'/ア'", enable_interrogative=True, expected_accent_phrases=expected_accent_phrases, - expected_interrogative_accent_phrase_marks=[ - False, - False, - False, - False, - False, - ], ) def su_accent_phrases(): @@ -293,6 +276,7 @@ def su_accent_phrases(): ], accent=1, pause_mora=None, + is_interrogative=False, ), ] @@ -301,14 +285,12 @@ def su_accent_phrases(): text="ス'", enable_interrogative=False, expected_accent_phrases=expected_accent_phrases, - expected_interrogative_accent_phrase_marks=[False], ) expected_accent_phrases = su_accent_phrases() self._interrogative_accent_phrase_marks_base( text="ス'", enable_interrogative=True, expected_accent_phrases=expected_accent_phrases, - expected_interrogative_accent_phrase_marks=[False], ) def under_score_su_accent_phrases(): @@ -326,6 +308,7 @@ def under_score_su_accent_phrases(): ], accent=1, pause_mora=None, + is_interrogative=False, ), ] @@ -334,7 +317,6 @@ def under_score_su_accent_phrases(): text="_ス'", enable_interrogative=False, expected_accent_phrases=expected_accent_phrases, - expected_interrogative_accent_phrase_marks=[False], ) expected_accent_phrases = under_score_su_accent_phrases() @@ -342,7 +324,6 @@ def under_score_su_accent_phrases(): text="_ス'", enable_interrogative=True, expected_accent_phrases=expected_accent_phrases, - expected_interrogative_accent_phrase_marks=[False], ) def gye_accent_phrases(): @@ -360,6 +341,7 @@ def gye_accent_phrases(): ], accent=1, pause_mora=None, + is_interrogative=False, ), ] @@ -368,7 +350,6 @@ def gye_accent_phrases(): text="ギェ'", enable_interrogative=False, expected_accent_phrases=expected_accent_phrases, - expected_interrogative_accent_phrase_marks=[False], ) expected_accent_phrases = gye_accent_phrases() @@ -376,7 +357,6 @@ def gye_accent_phrases(): text="ギェ'", enable_interrogative=True, expected_accent_phrases=expected_accent_phrases, - expected_interrogative_accent_phrase_marks=[False], ) def gye_gye_gye_accent_phrases(): @@ -401,6 +381,7 @@ def gye_gye_gye_accent_phrases(): vowel_length=0.0, pitch=0.0, ), + is_interrogative=False, ), AccentPhrase( moras=[ @@ -415,6 +396,7 @@ def gye_gye_gye_accent_phrases(): ], accent=1, pause_mora=None, + is_interrogative=False, ), AccentPhrase( moras=[ @@ -429,6 +411,7 @@ def gye_gye_gye_accent_phrases(): ], accent=1, pause_mora=None, + is_interrogative=False, ), ] @@ -437,7 +420,6 @@ def gye_gye_gye_accent_phrases(): text="ギェ'、ギェ'/ギェ'", enable_interrogative=False, expected_accent_phrases=expected_accent_phrases, - expected_interrogative_accent_phrase_marks=[False, False, False], ) expected_accent_phrases = gye_gye_gye_accent_phrases() @@ -445,7 +427,6 @@ def gye_gye_gye_accent_phrases(): text="ギェ'、ギェ'/ギェ'", enable_interrogative=True, expected_accent_phrases=expected_accent_phrases, - expected_interrogative_accent_phrase_marks=[False, False, False], ) def a_question_mark_accent_phrases(): @@ -463,6 +444,7 @@ def a_question_mark_accent_phrases(): ], accent=1, pause_mora=None, + is_interrogative=False, ), ] @@ -471,11 +453,11 @@ def a_question_mark_accent_phrases(): text="ア'?", enable_interrogative=False, expected_accent_phrases=expected_accent_phrases, - expected_interrogative_accent_phrase_marks=[False], ) expected_accent_phrases = a_question_mark_accent_phrases() - expected_accent_phrases[0].moras.append( + expected_accent_phrases[-1].is_interrogative = True + expected_accent_phrases[-1].moras.append( Mora( text="ア", consonant=None, @@ -489,7 +471,6 @@ def a_question_mark_accent_phrases(): text="ア'?", enable_interrogative=True, expected_accent_phrases=expected_accent_phrases, - expected_interrogative_accent_phrase_marks=[True], ) def gye_gye_gye_question_mark_accent_phrases(): @@ -514,6 +495,7 @@ def gye_gye_gye_question_mark_accent_phrases(): vowel_length=0.0, pitch=0.0, ), + is_interrogative=False, ), AccentPhrase( moras=[ @@ -528,6 +510,7 @@ def gye_gye_gye_question_mark_accent_phrases(): ], accent=1, pause_mora=None, + is_interrogative=False, ), AccentPhrase( moras=[ @@ -542,6 +525,7 @@ def gye_gye_gye_question_mark_accent_phrases(): ], accent=1, pause_mora=None, + is_interrogative=False, ), ] @@ -550,10 +534,10 @@ def gye_gye_gye_question_mark_accent_phrases(): text="ギェ'、ギェ'/ギェ'?", enable_interrogative=False, expected_accent_phrases=expected_accent_phrases, - expected_interrogative_accent_phrase_marks=[False, False, False], ) expected_accent_phrases = gye_gye_gye_question_mark_accent_phrases() + expected_accent_phrases[-1].is_interrogative = True expected_accent_phrases[-1].moras.append( Mora( text="エ", @@ -568,7 +552,6 @@ def gye_gye_gye_question_mark_accent_phrases(): text="ギェ'、ギェ'/ギェ'?", enable_interrogative=True, expected_accent_phrases=expected_accent_phrases, - expected_interrogative_accent_phrase_marks=[False, False, True], ) @@ -601,3 +584,103 @@ def test_exceptions(self): with self.assertRaises(ParseKanaError) as err: kana_parser.parse_kana("ア?ア'", True) self.assertEqual(err.exception.errcode, ParseKanaErrorCode.UNKNOWN_TEXT) + + +class TestCreateKana(TestCase): + def test_create_kana_interrogative(self): + def koreha_arimasuka_accent_phrases(): + return [ + AccentPhrase( + moras=[ + Mora( + text="コ", + consonant="k", + consonant_length=0, + vowel="o", + vowel_length=0, + pitch=0, + ), + Mora( + text="レ", + consonant="r", + consonant_length=0, + vowel="e", + vowel_length=0, + pitch=0, + ), + Mora( + text="ワ", + consonant="w", + consonant_length=0, + vowel="a", + vowel_length=0, + pitch=0, + ), + ], + accent=3, + pause_mora=None, + is_interrogative=False, + ), + AccentPhrase( + moras=[ + Mora( + text="ア", + consonant=None, + consonant_length=None, + vowel="a", + vowel_length=0, + pitch=0, + ), + Mora( + text="リ", + consonant="r", + consonant_length=0, + vowel="i", + vowel_length=0, + pitch=0, + ), + Mora( + text="マ", + consonant="m", + consonant_length=0, + vowel="a", + vowel_length=0, + pitch=0, + ), + Mora( + text="ス", + consonant="s", + consonant_length=0, + vowel="U", + vowel_length=0, + pitch=0, + ), + Mora( + text="カ", + consonant="k", + consonant_length=0, + vowel="a", + vowel_length=0, + pitch=0, + ), + Mora( + text="ア", + consonant=None, + consonant_length=None, + vowel="a", + vowel_length=0, + pitch=0, + ), + ], + accent=3, + pause_mora=None, + is_interrogative=False, + ), + ] + + accent_phrases = koreha_arimasuka_accent_phrases() + self.assertEqual(create_kana(accent_phrases), "コレワ'/アリマ'_スカア") + + accent_phrases = koreha_arimasuka_accent_phrases() + accent_phrases[-1].is_interrogative = True + self.assertEqual(create_kana(accent_phrases), "コレワ'/アリマ'_スカ?") diff --git a/test/test_mock_synthesis_engine.py b/test/test_mock_synthesis_engine.py index 27bf20bf5..a0460dc07 100644 --- a/test/test_mock_synthesis_engine.py +++ b/test/test_mock_synthesis_engine.py @@ -62,6 +62,7 @@ def setUp(self): vowel_length=0.0, pitch=0.0, ), + is_interrogative=False, ), AccentPhrase( moras=[ @@ -100,6 +101,7 @@ def setUp(self): ], accent=1, pause_mora=None, + is_interrogative=False, ), ] self.engine = MockSynthesisEngine(speakers="") diff --git a/test/test_synthesis_engine.py b/test/test_synthesis_engine.py index 28e3779d7..070912561 100644 --- a/test/test_synthesis_engine.py +++ b/test/test_synthesis_engine.py @@ -145,6 +145,7 @@ def setUp(self): vowel_length=0.0, pitch=0.0, ), + is_interrogative=False, ), AccentPhrase( moras=[ @@ -183,6 +184,7 @@ def setUp(self): ], accent=1, pause_mora=None, + is_interrogative=False, ), ] self.yukarin_s_mock = Mock(side_effect=yukarin_s_mock) diff --git a/test/test_synthesis_engine_base.py b/test/test_synthesis_engine_base.py index 8cc7d0068..6cb06b72a 100644 --- a/test/test_synthesis_engine_base.py +++ b/test/test_synthesis_engine_base.py @@ -58,6 +58,7 @@ def koreha_arimasuka_base_expected(): ], accent=3, pause_mora=None, + is_interrogative=False, ), AccentPhrase( moras=[ @@ -104,10 +105,12 @@ def koreha_arimasuka_base_expected(): ], accent=3, pause_mora=None, + is_interrogative=False, ), ] expected = koreha_arimasuka_base_expected() + expected[-1].is_interrogative = True expected[-1].moras += [ Mora( text="ア", @@ -153,6 +156,7 @@ def nn_base_expected(): ], accent=1, pause_mora=None, + is_interrogative=False, ) ] @@ -164,6 +168,7 @@ def nn_base_expected(): ) expected = nn_base_expected() + expected[-1].is_interrogative = True expected[-1].moras += [ Mora( text="ン", @@ -202,6 +207,7 @@ def ltu_base_expected(): ], accent=1, pause_mora=None, + is_interrogative=False, ) ] @@ -213,6 +219,7 @@ def ltu_base_expected(): ) expected = ltu_base_expected() + expected[-1].is_interrogative = True expected[-1].moras += [ Mora( text="ッ", @@ -251,6 +258,7 @@ def su_base_expected(): ], accent=1, pause_mora=None, + is_interrogative=False, ) ] @@ -262,6 +270,7 @@ def su_base_expected(): ) expected = su_base_expected() + expected[-1].is_interrogative = True expected[-1].moras += [ Mora( text="ウ", diff --git a/voicevox_engine/kana_parser.py b/voicevox_engine/kana_parser.py index 88e1b66ed..9f920b031 100644 --- a/voicevox_engine/kana_parser.py +++ b/voicevox_engine/kana_parser.py @@ -1,4 +1,4 @@ -from typing import List, Optional, Tuple +from typing import List, Optional from .model import AccentPhrase, Mora, ParseKanaError, ParseKanaErrorCode from .mora_list import openjtalk_mora2text, openjtalk_text2mora @@ -76,12 +76,12 @@ def _text_to_accent_phrase(phrase: str) -> List[AccentPhrase]: if accent_index is None: raise ParseKanaError(ParseKanaErrorCode.ACCENT_NOTFOUND, text=phrase) else: - return AccentPhrase(moras=moras, accent=accent_index, pause_mora=None) + return AccentPhrase( + moras=moras, accent=accent_index, pause_mora=None, is_interrogative=False + ) -def parse_kana( - text: str, enable_interrogative: bool -) -> Tuple[List[AccentPhrase], List[bool]]: +def parse_kana(text: str, enable_interrogative: bool) -> List[AccentPhrase]: """ AquesTalkライクな読み仮名をパースして音長・音高未指定のaccent phraseに変換 """ @@ -114,8 +114,6 @@ def parse_kana( ) parsed_results.append(accent_phrase) - interrogative_accent_phrase_marks = [False] * len(parsed_results) - if enable_interrogative and is_interrogative_text: last_parsed_result = parsed_results[-1] last_mora = last_parsed_result.moras[-1] @@ -129,18 +127,30 @@ def parse_kana( pitch=0, ) ) - interrogative_accent_phrase_marks[-1] = True + last_parsed_result.is_interrogative = True - return parsed_results, interrogative_accent_phrase_marks + return parsed_results def create_kana(accent_phrases: List[AccentPhrase]) -> str: text = "" + replace_vowel_to_interrogative = ( + len(accent_phrases) > 0 and accent_phrases[-1].is_interrogative + ) for i, phrase in enumerate(accent_phrases): for j, mora in enumerate(phrase.moras): if mora.vowel in ["A", "I", "U", "E", "O"]: text += UNVOICE_SYMBOL - text += mora.text + + # TODO: 疑問系が正式に対応したらここの処理をmora.textを追加した上で疑問符を追加する処理に変更する + if ( + replace_vowel_to_interrogative + and i == len(accent_phrases) - 1 + and j == len(phrase.moras) - 1 + ): + text += WIDE_INTERROGATION_MARK + else: + text += mora.text if j + 1 == phrase.accent: text += ACCENT_SYMBOL if i < len(accent_phrases) - 1: diff --git a/voicevox_engine/model.py b/voicevox_engine/model.py index 76e68e804..d3a7df550 100644 --- a/voicevox_engine/model.py +++ b/voicevox_engine/model.py @@ -40,6 +40,7 @@ class AccentPhrase(BaseModel): moras: List[Mora] = Field(title="モーラのリスト") accent: int = Field(title="アクセント箇所") pause_mora: Optional[Mora] = Field(title="後ろに無音を付けるかどうか") + is_interrogative: bool def __hash__(self): items = [ diff --git a/voicevox_engine/synthesis_engine/synthesis_engine_base.py b/voicevox_engine/synthesis_engine/synthesis_engine_base.py index 64ac71959..431a995e4 100644 --- a/voicevox_engine/synthesis_engine/synthesis_engine_base.py +++ b/voicevox_engine/synthesis_engine/synthesis_engine_base.py @@ -37,8 +37,6 @@ def add_interrogative_mora_if_last_phoneme_is_interrogative( def adjust_interrogative_accent_phrases( accent_phrases: List[AccentPhrase], - interrogative_accent_phrase_marks: List[bool], - enable_interrogative: bool, ) -> List[AccentPhrase]: """ enable_interrogativeが有効になっていて与えられたaccent_phrasesに疑問系のものがあった場合、 @@ -48,14 +46,13 @@ def adjust_interrogative_accent_phrases( return [ AccentPhrase( moras=adjust_interrogative_moras(accent_phrase.moras) - if enable_interrogative and interrogative_accent_phrase_mark + if accent_phrase.is_interrogative else accent_phrase.moras, accent=accent_phrase.accent, pause_mora=accent_phrase.pause_mora, + is_interrogative=accent_phrase.is_interrogative, ) - for accent_phrase, interrogative_accent_phrase_mark in zip( - accent_phrases, interrogative_accent_phrase_marks - ) + for accent_phrase in accent_phrases ] @@ -156,12 +153,6 @@ def create_accent_phrases( if len(utterance.breath_groups) == 0: return [] - interrogative_accent_phrase_marks = [ - accent_phrase.is_interrogative - for breath_group in utterance.breath_groups - for accent_phrase in breath_group.accent_phrases - ] - accent_phrases = self.replace_mora_data( accent_phrases=[ AccentPhrase( @@ -187,6 +178,8 @@ def create_accent_phrases( ) else None ), + is_interrogative=enable_interrogative + and accent_phrase.is_interrogative, ) for i_breath_group, breath_group in enumerate(utterance.breath_groups) for i_accent_phrase, accent_phrase in enumerate( @@ -195,9 +188,7 @@ def create_accent_phrases( ], speaker_id=speaker_id, ) - return adjust_interrogative_accent_phrases( - accent_phrases, interrogative_accent_phrase_marks, enable_interrogative - ) + return adjust_interrogative_accent_phrases(accent_phrases) @abstractmethod def synthesis(self, query: AudioQuery, speaker_id: int): diff --git a/voicevox_engine/webapi/fastapi_model.py b/voicevox_engine/webapi/fastapi_model.py index b934dfa59..3d349c16c 100644 --- a/voicevox_engine/webapi/fastapi_model.py +++ b/voicevox_engine/webapi/fastapi_model.py @@ -89,6 +89,7 @@ def to_engine(self) -> model.AccentPhrase: pause_mora=self.pause_mora.to_engine() if self.pause_mora is not None else None, + is_interrogative=False, )