diff --git a/vllm_omni/entrypoints/openai/serving_speech.py b/vllm_omni/entrypoints/openai/serving_speech.py
index 10c5fdacc59..3a2566584cb 100644
--- a/vllm_omni/entrypoints/openai/serving_speech.py
+++ b/vllm_omni/entrypoints/openai/serving_speech.py
@@ -1183,6 +1183,29 @@ def _build_voxtral_prompt(self, request: OpenAICreateSpeechRequest) -> dict[str,
             mistral_tokenizer = cached_tokenizer_from_config(self.engine_client.model_config)
             self._tts_tokenizer = mistral_tokenizer.instruct
         if voice is not None:
+            # For custom uploaded voices, mistral_common doesn't know the voice name.
+            # Resolve to reference audio data stored at upload time instead.
+            voice_lower = voice.lower()
+            if voice_lower in self.uploaded_speakers:
+                speaker_info = self.uploaded_speakers[voice_lower]
+                file_path = Path(speaker_info["file_path"])
+                if file_path.exists():
+                    with open(file_path, "rb") as f:
+                        audio_bytes = f.read()
+                    audio_b64 = base64.b64encode(audio_bytes).decode("utf-8")
+                    mime_type = speaker_info.get("mime_type", "audio/wav")
+                    ref_audio = f"data:{mime_type};base64,{audio_b64}"
+                    # Strip data URI prefix for mistral_common
+                    _, _, ref_audio = ref_audio.partition(",")
+                    tokenized = self._tts_tokenizer.encode_speech_request(
+                        SpeechRequest(input=text, ref_audio=ref_audio)
+                    )
+                    audio = tokenized.audios[0]
+                    return {
+                        "prompt_token_ids": tokenized.tokens,
+                        "multi_modal_data": {"audio": [(audio.audio_array, audio.sampling_rate)]},
+                    }
+                # Fall through to voice-name path if file is missing
             tokens = self._tts_tokenizer.encode_speech_request(SpeechRequest(input=text, voice=voice)).tokens
             return {
                 "prompt_token_ids": tokens,