vllm-project · ericcurtin · Mar 19, 2026 · Mar 17, 2026 · Mar 18, 2026 · Mar 18, 2026
diff --git a/tests/test_qwen3_asr.py b/tests/test_qwen3_asr.py
@@ -12,6 +12,7 @@
 import pytest
 
 from vllm_metal.stt.config import is_stt_model
+from vllm_metal.stt.loader import load_model
 from vllm_metal.stt.qwen3_asr.config import (
     Qwen3ASRAudioConfig,
     Qwen3ASRConfig,
@@ -23,7 +24,7 @@
     Qwen3Attention,
     Qwen3LM,
 )
-from vllm_metal.stt.transcribe import Qwen3ASRTranscriber, load_model
+from vllm_metal.stt.qwen3_asr.transcriber import Qwen3ASRTranscriber
 
 # ===========================================================================
 # Configuration

diff --git a/tests/test_transcribe.py b/tests/test_transcribe.py
@@ -10,11 +10,9 @@
 import mlx.core as mx
 import pytest
 
-from vllm_metal.stt.transcribe import (
-    TranscriptionResult,
-    WhisperTranscriber,
-    load_model,
-)
+from vllm_metal.stt.loader import load_model
+from vllm_metal.stt.protocol import TranscriptionResult
+from vllm_metal.stt.whisper import WhisperTranscriber
 from vllm_metal.stt.whisper.transcriber import MAX_PROMPT_TOKENS
 
 # ===========================================================================
@@ -370,11 +368,12 @@ def test_transcribe_silence(self) -> None:
         import mlx.core as mx
 
         from vllm_metal.stt.audio import SAMPLE_RATE
-        from vllm_metal.stt.transcribe import load_model, transcribe
+        from vllm_metal.stt.loader import load_model
+        from vllm_metal.stt.whisper import WhisperTranscriber
 
         model = load_model("openai/whisper-tiny")
         audio = mx.zeros(SAMPLE_RATE * 3)  # 3s silence
-        result = transcribe(model, audio)
+        result = WhisperTranscriber(model).transcribe(audio)
         # Whisper may hallucinate on silence; just check it doesn't crash
         assert isinstance(result.text, str)
 
@@ -383,9 +382,10 @@ def test_transcribe_with_timestamps(self) -> None:
         import mlx.core as mx
 
         from vllm_metal.stt.audio import SAMPLE_RATE
-        from vllm_metal.stt.transcribe import load_model, transcribe
+        from vllm_metal.stt.loader import load_model
+        from vllm_metal.stt.whisper import WhisperTranscriber
 
         model = load_model("openai/whisper-tiny")
         audio = mx.zeros(SAMPLE_RATE * 3)
-        result = transcribe(model, audio, with_timestamps=True)
+        result = WhisperTranscriber(model).transcribe(audio, with_timestamps=True)
         assert result.duration == pytest.approx(3.0)