VOICEVOX · Hiroshiba · May 3, 2024 · Apr 1, 2024 · Apr 2, 2024 · Apr 2, 2024
@@ -1,4 +1,4 @@
 # serializer version: 1
 # name: test_テキストと話者IDから音声を合成できる
-  'MD5:9cb1070db2510240ff63a16fd42907c9'
+  'MD5:8f7ddc461c68542d4d8ef4cd5c54ca82'
 # ---
@@ -0,0 +1,4 @@
+# serializer version: 1
+# name: test_post_frame_synthesis_200
+  'MD5:1c385210acba238994604a8cee96aee3'
+# ---
@@ -0,0 +1,4 @@
+# serializer version: 1
+# name: test_post_synthesis_200
+  'MD5:f7d42ce5787856549abc3d2d7561c06f'
+# ---
@@ -1,5 +1,5 @@
 """
-AudioQuery APIのテスト
+/audio_query API のテスト
 """
 
 from test.utility import round_floats
@@ -8,7 +8,7 @@
 from syrupy.assertion import SnapshotAssertion
 
 
-def test_speakerを指定して音声合成クエリが取得できる(
+def test_post_audio_query_200(
     client: TestClient, snapshot_json: SnapshotAssertion
 ) -> None:
     response = client.post("/audio_query", params={"text": "テストです", "speaker": 0})

@@ -2,10 +2,15 @@
 /frame_synthesis API のテスト
 """
 
+from test.utility import hash_wave_floats_from_wav_bytes
+
 from fastapi.testclient import TestClient
+from syrupy.assertion import SnapshotAssertion
 
 
-def test_post_frame_synthesis_200(client: TestClient) -> None:
+def test_post_frame_synthesis_200(
+    client: TestClient, snapshot: SnapshotAssertion
+) -> None:
     query = {
         "f0": [
             0.0,
@@ -81,3 +86,7 @@ def test_post_frame_synthesis_200(client: TestClient) -> None:
     }
     response = client.post("/frame_synthesis", params={"speaker": 0}, json=query)
     assert response.status_code == 200
+
+    # FileResponse 内の .wav から抽出された音声波形が一致する
+    assert response.headers["content-type"] == "audio/wav"
+    assert snapshot == hash_wave_floats_from_wav_bytes(response.read())
@@ -59,3 +59,14 @@ def test_post_multi_synthesis_200(client: TestClient) -> None:
     ]
     response = client.post("/multi_synthesis", params={"speaker": 0}, json=queries)
     assert response.status_code == 200
+
+    # FileResponse 内の zip ファイルに圧縮された .wav から抽出された音声波形が一致する
+    # FIXME: スナップショットテストを足す
+    # NOTE: ZIP ファイル内の .wav に Linux-Windows 数値精度問題があるため解凍が必要
+    assert response.headers["content-type"] == "application/zip"
+    # from test.utility import summarize_wav_bytes
+    # from syrupy.assertion import SnapshotAssertion
+    # # zip 解凍
+    # wav_summarys = map(lambda wav_byte: summarize_wav_bytes(wav_byte), wav_bytes)
+    # wavs_summary = concatenate_func(wav_summarys)
+    # assert snapshot == wavs_summary
@@ -3,11 +3,13 @@
 """
 
 from test.e2e.single_api.utils import gen_mora
+from test.utility import hash_wave_floats_from_wav_bytes
 
 from fastapi.testclient import TestClient
+from syrupy.assertion import SnapshotAssertion
 
 
-def test_post_synthesis_200(client: TestClient) -> None:
+def test_post_synthesis_200(client: TestClient, snapshot: SnapshotAssertion) -> None:
     query = {
         "accent_phrases": [
             {
@@ -33,3 +35,7 @@ def test_post_synthesis_200(client: TestClient) -> None:
     }
     response = client.post("/synthesis", params={"speaker": 0}, json=query)
     assert response.status_code == 200
+
+    # 音声波形が一致する
+    assert response.headers["content-type"] == "audio/wav"
+    assert snapshot == hash_wave_floats_from_wav_bytes(response.read())
@@ -37,3 +37,10 @@ def test_post_synthesis_morphing_200(client: TestClient) -> None:
         json=queries,
     )
     assert response.status_code == 200
+
+    # FIXME: LinuxとMacOSで計算結果が一致しないためスナップショットテストがコケる（原因不明）
+    # from test.utility import summarize_wav_bytes
+    # from syrupy.assertion import SnapshotAssertion
+    # # FileResponse 内の .wav から抽出された音声波形が一致する
+    # assert response.headers["content-type"] == "audio/wav"
+    # assert snapshot == summarize_wav_bytes(response.read())
@@ -2,10 +2,8 @@
 TTSのテスト
 """
 
-import io
-from test.utility import hash_long_string, round_floats
+from test.utility import hash_wave_floats_from_wav_bytes
 
-import soundfile as sf
 from fastapi.testclient import TestClient
 from syrupy.assertion import SnapshotAssertion
 
@@ -22,17 +20,9 @@ def test_テキストと話者IDから音声を合成できる(
     # AudioQuery から音声波形を生成する
     synthesis_res = client.post("/synthesis", params={"speaker": 0}, json=audio_query)
 
-    # wav ファイルを含む FileResponse から音声波形を抽出する
-    wav_bytes = io.BytesIO(synthesis_res.read())
-    wave = sf.read(wav_bytes)[0].tolist()
-
-    # NOTE: Linux-Windows 数値精度問題に対するワークアラウンド
-    wave = round_floats(wave, 2)
-
     # リクエストが成功している
     assert synthesis_res.status_code == 200
-    # レスポンスが音声ファイルである
+
+    # FileResponse 内の .wav から抽出された音声波形が一致する
     assert synthesis_res.headers["content-type"] == "audio/wav"
-    # 音声波形が commit 間で不変である
-    wave_str = " ".join(map(lambda point: str(point), wave))
-    assert snapshot == hash_long_string(wave_str)
+    assert snapshot == hash_wave_floats_from_wav_bytes(synthesis_res.read())
@@ -1,7 +1,10 @@
 import hashlib
+import io
 import json
 from typing import Any
 
+import numpy as np
+import soundfile as sf
 from pydantic.json import pydantic_encoder
 
 
@@ -36,3 +39,11 @@ def to_hash(value: str) -> str:
         return {k: hash_long_string(v) for k, v in value.items()}
     else:
         return value
+
+
+def hash_wave_floats_from_wav_bytes(wav_bytes: bytes) -> str:
+    """.wavファイルバイト列から音声波形を抽出しハッシュ化する"""
+    wave = sf.read(io.BytesIO(wav_bytes))[0].tolist()
+    # NOTE: Linux-Windows 数値精度問題に対するワークアラウンド
+    wave = round_floats(wave, 2)
+    return "MD5:" + hashlib.md5(np.array(wave).tobytes()).hexdigest()