vllm-project · DarkLight1337 · Feb 11, 2026 · Jan 29, 2026 · Jan 29, 2026 · Jan 29, 2026
diff --git a/docs/models/supported_models.md b/docs/models/supported_models.md
@@ -790,6 +790,7 @@ Speech2Text models trained specifically for Automatic Speech Recognition.
 
 | Architecture | Models | Example HF Models | [LoRA](../features/lora.md) | [PP](../serving/parallelism_scaling.md) |
 |--------------|--------|-------------------|----------------------|---------------------------|
+| `FunASRForConditionalGeneration` | FunASR | `allendou/Fun-ASR-Nano-2512-vllm`, etc. | | |
 | `Gemma3nForConditionalGeneration` | Gemma3n | `google/gemma-3n-E2B-it`, `google/gemma-3n-E4B-it`, etc. | | |
 | `GlmAsrForConditionalGeneration` | GLM-ASR | `zai-org/GLM-ASR-Nano-2512` | ✅︎ | ✅︎ |
 | `GraniteSpeechForConditionalGeneration` | Granite Speech | `ibm-granite/granite-speech-3.3-2b`, `ibm-granite/granite-speech-3.3-8b`, etc. | ✅︎ | ✅︎ |

diff --git a/examples/online_serving/openai_transcription_client.py b/examples/online_serving/openai_transcription_client.py
@@ -26,7 +26,9 @@
 from vllm.assets.audio import AudioAsset
 
 
-def sync_openai(audio_path: str, client: OpenAI, model: str):
+def sync_openai(
+    audio_path: str, client: OpenAI, model: str, *, repetition_penalty: float = 1.3
+):
     """
     Perform synchronous transcription using OpenAI-compatible API.
     """
@@ -40,7 +42,7 @@ def sync_openai(audio_path: str, client: OpenAI, model: str):
             # Additional sampling params not provided by OpenAI API.
             extra_body=dict(
                 seed=4419,
-                repetition_penalty=1.3,
+                repetition_penalty=repetition_penalty,
             ),
         )
         print("transcription result [sync]:", transcription.text)
@@ -129,7 +131,12 @@ def main(args):
     print(f"Using model: {model}")
 
     # Run the synchronous function
-    sync_openai(args.audio_path if args.audio_path else mary_had_lamb, client, model)
+    sync_openai(
+        audio_path=args.audio_path if args.audio_path else mary_had_lamb,
+        client=client,
+        model=model,
+        repetition_penalty=args.repetition_penalty,
+    )
 
     # Run the asynchronous function
     if "openai" in model:
@@ -161,5 +168,11 @@ def main(args):
         default=None,
         help="The path to the audio file to transcribe.",
     )
+    parser.add_argument(
+        "--repetition_penalty",
+        type=float,
+        default=1.3,
+        help="repetition penalty",
+    )
     args = parser.parse_args()
     main(args)
@@ -713,6 +713,10 @@ def check_available_online(
         "baidu/ERNIE-4.5-VL-28B-A3B-PT",
         trust_remote_code=True,
     ),
+    "FunASRForConditionalGeneration": _HfExamplesInfo(
+        "allendou/Fun-ASR-Nano-2512-vllm",
+        is_available_online=False,
+    ),
     "FunAudioChatForConditionalGeneration": _HfExamplesInfo(
         "funaudiochat", is_available_online=False
     ),