livekit · davidzhao · Oct 12, 2025 · Oct 12, 2025 · Oct 12, 2025
diff --git a/livekit-plugins/livekit-plugins-google/livekit/plugins/google/models.py b/livekit-plugins/livekit-plugins-google/livekit/plugins/google/models.py
@@ -10,6 +10,7 @@
     "medical_conversation",
     "chirp",
     "chirp_2",
+    "chirp_3",
     "latest_long",
     "latest_short",
 ]

diff --git a/livekit-plugins/livekit-plugins-google/livekit/plugins/google/stt.py b/livekit-plugins/livekit-plugins-google/livekit/plugins/google/stt.py
@@ -618,17 +618,28 @@ def _streaming_recognize_response_to_speech_data(
 ) -> stt.SpeechData | None:
     text = ""
     confidence = 0.0
+    final_result = None
     for result in resp.results:
         if len(result.alternatives) == 0:
             continue
-        text += result.alternatives[0].transcript
-        confidence += result.alternatives[0].confidence
-
-    confidence /= len(resp.results)
-    lg = resp.results[0].language_code
+        else:
+            if result.is_final:
+                final_result = result
+                break
+            else:
+                text += result.alternatives[0].transcript
+                confidence += result.alternatives[0].confidence
+
+    if final_result is not None:
+        text = final_result.alternatives[0].transcript
+        confidence = final_result.alternatives[0].confidence
+        lg = final_result.language_code
+    else:
+        confidence /= len(resp.results)
+        if confidence < min_confidence_threshold:
+            return None
+        lg = resp.results[0].language_code
 
-    if confidence < min_confidence_threshold:
-        return None
     if text == "":
         return None
 

diff --git a/tests/test_plugin_google_stt.py b/tests/test_plugin_google_stt.py
@@ -0,0 +1,106 @@
+from google.cloud.speech_v2.types import cloud_speech
+
+from livekit.agents.stt import SpeechData
+from livekit.plugins.google.stt import (
+    _streaming_recognize_response_to_speech_data,  # pyright: ignore[reportPrivateUsage]
+)
+
+
+async def test_streaming_recognize_response_to_speech_data_01():
+    srr = cloud_speech.StreamingRecognizeResponse(
+        results=[cloud_speech.StreamingRecognitionResult()]
+    )
+    assert _streaming_recognize_response_to_speech_data(srr, min_confidence_threshold=1.0) is None
+
+
+async def test_streaming_recognize_response_to_speech_data_02():
+    srr = cloud_speech.StreamingRecognizeResponse(
+        results=[
+            cloud_speech.StreamingRecognitionResult(
+                alternatives=[
+                    cloud_speech.SpeechRecognitionAlternative(confidence=0.0, transcript="test")
+                ],
+                is_final=True,
+                language_code="te-ST",
+            )
+        ]
+    )
+    result = _streaming_recognize_response_to_speech_data(srr, min_confidence_threshold=0.5)
+    assert type(result) is SpeechData
+    assert result.text == "test"
+    assert result.language == "te-ST"
+    assert result.confidence == 0.0
+
+
+async def test_streaming_recognize_response_to_speech_data_03():
+    srr = cloud_speech.StreamingRecognizeResponse(
+        results=[
+            cloud_speech.StreamingRecognitionResult(
+                alternatives=[
+                    cloud_speech.SpeechRecognitionAlternative(confidence=0.0, transcript="test")
+                ],
+                is_final=False,
+            )
+        ]
+    )
+    result = _streaming_recognize_response_to_speech_data(srr, min_confidence_threshold=0.5)
+    assert result is None
+
+
+async def test_streaming_recognize_response_to_speech_data_04():
+    srr = cloud_speech.StreamingRecognizeResponse(
+        results=[
+            cloud_speech.StreamingRecognitionResult(
+                alternatives=[
+                    cloud_speech.SpeechRecognitionAlternative(confidence=1.0, transcript="test01")
+                ],
+                is_final=False,
+                language_code="te-ST",
+            ),
+            cloud_speech.StreamingRecognitionResult(
+                alternatives=[
+                    cloud_speech.SpeechRecognitionAlternative(confidence=1.0, transcript="test02")
+                ],
+                is_final=False,
+                language_code="te-ST",
+            ),
+        ]
+    )
+    result = _streaming_recognize_response_to_speech_data(srr, min_confidence_threshold=0.5)
+    assert type(result) is SpeechData
+    assert result.text == "test01test02"
+    assert result.language == "te-ST"
+    assert result.confidence == 1.0
+
+
+async def test_streaming_recognize_response_to_speech_data_05():
+    srr = cloud_speech.StreamingRecognizeResponse(
+        results=[
+            cloud_speech.StreamingRecognitionResult(
+                alternatives=[
+                    cloud_speech.SpeechRecognitionAlternative(confidence=1.0, transcript="test01")
+                ],
+                is_final=False,
+                language_code="te-ST",
+            ),
+            cloud_speech.StreamingRecognitionResult(
+                alternatives=[
+                    cloud_speech.SpeechRecognitionAlternative(confidence=1.0, transcript="test02")
+                ],
+                is_final=False,
+                language_code="te-ST",
+            ),
+            cloud_speech.StreamingRecognitionResult(
+                alternatives=[
+                    cloud_speech.SpeechRecognitionAlternative(confidence=1.0, transcript="best")
+                ],
+                is_final=True,
+                language_code="te-ST",
+            ),
+        ]
+    )
+    result = _streaming_recognize_response_to_speech_data(srr, min_confidence_threshold=0.5)
+    assert type(result) is SpeechData
+    assert result.text == "best"
+    assert result.language == "te-ST"
+    assert result.confidence == 1.0