chore: sync sdk code with DeepLearning repo (#149)

he-james · arawka-aai · web-flow · commit d7893e200680 · 2025-12-01T11:25:33.000-06:00
Co-authored-by: AssemblyAI &lt;engineering.sdk@assemblyai.com&gt; and Abhi Rawka &lt;arawka@assemblyai.com&gt;
diff --git a/assemblyai/__version__.py b/assemblyai/__version__.py
@@ -1 +1 @@
-__version__ = "0.46.0"
+__version__ = "0.48.0"
diff --git a/assemblyai/types.py b/assemblyai/types.py
@@ -507,6 +507,10 @@ class LanguageDetectionOptions(BaseModel):
         None,
         description="The confidence threshold for code switching detection. Valid values are in the range [0,1] inclusive.",
     )
+    on_low_language_confidence: Optional[str] = Field(
+        None,
+        description='Controls behavior when language confidence is below threshold. Either "error" (default) or "fallback".',
+    )
 
 
 class CodeSwitchingLanguage(BaseModel):
@@ -681,6 +685,10 @@ class SpeakerOptions(BaseModel):
     max_speakers_expected: Optional[int] = Field(
         None, ge=1, description="Maximum number of speakers expected in the audio"
     )
+    use_two_stage_clustering: Optional[bool] = Field(
+        None,
+        description="Enable or disable two-stage clustering for speaker diarization",
+    )
 
     if pydantic_v2:
 
@@ -1702,6 +1710,7 @@ def set_language_detection(
         confidence_threshold: Optional[float] = None,
         expected_languages: Optional[List[str]] = None,
         fallback_language: Optional[str] = None,
+        on_low_language_confidence: Optional[str] = None,
     ) -> Self:
         """
         Enable Automatic Language Detection with optional configuration.
@@ -1711,6 +1720,7 @@ def set_language_detection(
             confidence_threshold: The confidence threshold that must be reached.
             expected_languages: A list of languages that the audio could be expected to be.
             fallback_language: The language to fallback to if detection fails.
+            on_low_language_confidence: Controls behavior when language confidence is below threshold. Either "error" (default) or "fallback".
         """
 
         if not enable:
@@ -1724,11 +1734,12 @@ def set_language_detection(
             confidence_threshold
         )
 
-        if expected_languages or fallback_language:
+        if expected_languages or fallback_language or on_low_language_confidence:
             self._raw_transcription_config.language_detection_options = (
                 LanguageDetectionOptions(
                     expected_languages=expected_languages,
                     fallback_language=fallback_language,
+                    on_low_language_confidence=on_low_language_confidence,
                 )
             )
 
diff --git a/tests/unit/test_language_detection_options.py b/tests/unit/test_language_detection_options.py
@@ -127,3 +127,68 @@ def test_set_language_detection_disable():
     assert config.language_detection is None
     assert config.language_confidence_threshold is None
     assert config.language_detection_options is None
+
+
+def test_language_detection_options_with_on_low_language_confidence():
+    """Test that LanguageDetectionOptions accepts on_low_language_confidence parameter."""
+    options = aai.LanguageDetectionOptions(
+        expected_languages=["en", "es"],
+        fallback_language="en",
+        on_low_language_confidence="fallback",
+    )
+    assert options.expected_languages == ["en", "es"]
+    assert options.fallback_language == "en"
+    assert options.on_low_language_confidence == "fallback"
+
+
+def test_language_detection_options_on_low_confidence_only():
+    """Test that LanguageDetectionOptions can be created with only on_low_language_confidence."""
+    options = aai.LanguageDetectionOptions(on_low_language_confidence="error")
+    assert options.expected_languages is None
+    assert options.fallback_language is None
+    assert options.on_low_language_confidence == "error"
+
+
+def test_set_language_detection_with_on_low_confidence():
+    """Test the set_language_detection method with on_low_language_confidence."""
+    config = aai.TranscriptionConfig().set_language_detection(
+        confidence_threshold=0.8,
+        expected_languages=["en", "fr"],
+        fallback_language="en",
+        on_low_language_confidence="fallback",
+    )
+
+    assert config.language_detection is True
+    assert config.language_confidence_threshold == 0.8
+    assert config.language_detection_options.expected_languages == ["en", "fr"]
+    assert config.language_detection_options.fallback_language == "en"
+    assert config.language_detection_options.on_low_language_confidence == "fallback"
+
+
+def test_set_language_detection_on_low_confidence_only():
+    """Test set_language_detection with only on_low_language_confidence parameter."""
+    config = aai.TranscriptionConfig().set_language_detection(
+        on_low_language_confidence="error"
+    )
+
+    assert config.language_detection is True
+    assert config.language_detection_options is not None
+    assert config.language_detection_options.on_low_language_confidence == "error"
+
+
+def test_transcription_config_with_on_low_confidence_in_options():
+    """Test that TranscriptionConfig properly handles on_low_language_confidence in options."""
+    options = aai.LanguageDetectionOptions(
+        fallback_language="en", on_low_language_confidence="fallback"
+    )
+
+    config = aai.TranscriptionConfig(
+        language_detection=True,
+        language_confidence_threshold=0.9,
+        language_detection_options=options,
+    )
+
+    assert config.language_detection is True
+    assert config.language_confidence_threshold == 0.9
+    assert config.language_detection_options.fallback_language == "en"
+    assert config.language_detection_options.on_low_language_confidence == "fallback"
diff --git a/tests/unit/test_speaker_options.py b/tests/unit/test_speaker_options.py
@@ -94,3 +94,36 @@ def test_speaker_options_in_raw_config():
     config = aai.TranscriptionConfig(speaker_options=speaker_options)
 
     assert config.raw.speaker_options == speaker_options
+
+
+def test_speaker_options_with_two_stage_clustering():
+    """Test that SpeakerOptions can be created with use_two_stage_clustering parameter."""
+    speaker_options = aai.SpeakerOptions(
+        min_speakers_expected=2,
+        max_speakers_expected=5,
+        use_two_stage_clustering=False,
+    )
+    assert speaker_options.min_speakers_expected == 2
+    assert speaker_options.max_speakers_expected == 5
+    assert speaker_options.use_two_stage_clustering is False
+
+
+def test_speaker_options_two_stage_clustering_true():
+    """Test that use_two_stage_clustering can be set to True."""
+    speaker_options = aai.SpeakerOptions(use_two_stage_clustering=True)
+    assert speaker_options.use_two_stage_clustering is True
+
+
+def test_transcription_config_with_two_stage_clustering():
+    """Test that TranscriptionConfig accepts speaker_options with use_two_stage_clustering."""
+    speaker_options = aai.SpeakerOptions(
+        min_speakers_expected=2, max_speakers_expected=4, use_two_stage_clustering=False
+    )
+
+    config = aai.TranscriptionConfig(
+        speaker_labels=True, speaker_options=speaker_options
+    )
+
+    assert config.speaker_labels is True
+    assert config.speaker_options == speaker_options
+    assert config.speaker_options.use_two_stage_clustering is False

Original file line number	Diff line number	Diff line change
`@@ -1 +1 @@`
`1`		`-__version__ = "0.46.0"`
	`1`	`+__version__ = "0.48.0"`