vllm-project · Isotr0py · Jan 22, 2026 · Jan 20, 2026 · Jan 20, 2026 · Jan 20, 2026
diff --git a/requirements/common.txt b/requirements/common.txt
@@ -32,7 +32,7 @@ pyzmq >= 25.0.0
 msgspec
 gguf >= 0.17.0
 mistral_common[image] >= 1.8.8
-opencv-python-headless >= 4.11.0    # required for video IO
+opencv-python-headless >= 4.13.0    # required for video IO
 pyyaml
 six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
 setuptools>=77.0.3,<81.0.0; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12

diff --git a/requirements/nightly_torch_test.txt b/requirements/nightly_torch_test.txt
@@ -25,7 +25,7 @@ transformers_stream_generator # required for qwen-vl test
 matplotlib # required for qwen-vl test
 mistral_common[image,audio] >= 1.8.8 # required for voxtral test
 num2words # required for smolvlm test
-opencv-python-headless >= 4.11.0 # required for video test
+opencv-python-headless >= 4.13.0 # required for video test
 datamodel_code_generator # required for minicpm3 test
 lm-eval[api]>=0.4.9.2 # required for model evaluation test
 mteb>=1.38.11, <2 # required for mteb test
@@ -37,8 +37,8 @@ bitsandbytes>=0.46.1
 buildkite-test-collector==0.1.9
 
 
-genai_perf==0.0.8
-tritonclient==2.51.0
+genai_perf>=0.0.8
+tritonclient>=2.51.0
 
 numba == 0.61.2 # Required for N-gram speculative decoding
 numpy

diff --git a/requirements/test.in b/requirements/test.in
@@ -33,7 +33,7 @@ matplotlib # required for qwen-vl test
 mistral_common[image,audio] >= 1.8.8 # required for voxtral test
 num2words # required for smolvlm test
 open_clip_torch==2.32.0 # Required for nemotron_vl test, Nemotron Parse in test_common.py
-opencv-python-headless >= 4.11.0 # required for video test
+opencv-python-headless >= 4.13.0 # required for video test
 datamodel_code_generator # required for minicpm3 test
 lm-eval[api]>=0.4.9.2 # required for model evaluation test
 mteb[bm25s]>=2, <3 # required for mteb test
@@ -45,8 +45,8 @@ bitsandbytes==0.46.1
 buildkite-test-collector==0.1.9
 
 
-genai_perf==0.0.8
-tritonclient==2.51.0
+genai_perf>=0.0.8
+tritonclient>=2.51.0
 
 arctic-inference == 0.1.1 # Required for suffix decoding test
 numba == 0.61.2 # Required for N-gram speculative decoding

diff --git a/requirements/test.txt b/requirements/test.txt
@@ -31,7 +31,9 @@ albumentations==1.4.6
     #   -r requirements/test.in
     #   terratorch
 alembic==1.16.4
-    # via mlflow
+    # via
+    #   mlflow
+    #   optuna
 annotated-doc==0.0.4
     # via fastapi
 annotated-types==0.7.0
@@ -145,6 +147,8 @@ colorama==0.4.6
     #   tqdm-multiprocess
 colorful==0.5.6
     # via ray
+colorlog==6.10.1
+    # via optuna
 contourpy==1.3.0
     # via matplotlib
 coverage==7.10.6
@@ -252,7 +256,7 @@ fsspec==2024.9.0
     #   torch
 ftfy==6.3.1
     # via open-clip-torch
-genai-perf==0.0.8
+genai-perf==0.0.16
     # via -r requirements/test.in
 genson==1.3.0
     # via datamodel-code-generator
@@ -389,6 +393,7 @@ jinja2==3.1.6
     # via
     #   datamodel-code-generator
     #   flask
+    #   genai-perf
     #   mlflow
     #   torch
 jiwer==3.0.5
@@ -528,7 +533,7 @@ numba==0.61.2
     #   librosa
 numexpr==2.10.1
     # via lm-eval
-numpy==1.26.4
+numpy==2.2.6
     # via
     #   -r requirements/test.in
     #   accelerate
@@ -558,6 +563,7 @@ numpy==1.26.4
     #   numba
     #   numexpr
     #   opencv-python-headless
+    #   optuna
     #   pandas
     #   patsy
     #   peft
@@ -637,7 +643,7 @@ opencensus==0.11.4
     # via ray
 opencensus-context==0.1.3
     # via opencensus
-opencv-python-headless==4.11.0.86
+opencv-python-headless==4.13.0.90
     # via
     #   -r requirements/test.in
     #   albucore
@@ -660,6 +666,10 @@ opentelemetry-sdk==1.35.0
     #   ray
 opentelemetry-semantic-conventions==0.56b0
     # via opentelemetry-sdk
+optuna==3.6.1
+    # via genai-perf
+orjson==3.11.5
+    # via genai-perf
 packaging==24.2
     # via
     #   accelerate
@@ -678,6 +688,7 @@ packaging==24.2
     #   lightning-utilities
     #   matplotlib
     #   mlflow-skinny
+    #   optuna
     #   peft
     #   plotly
     #   pooch
@@ -717,6 +728,8 @@ peft==0.16.0
     #   lm-eval
 perceptron==0.1.4
     # via -r requirements/test.in
+perf-analyzer==0.1.0
+    # via genai-perf
 pillow==10.4.0
     # via
     #   genai-perf
@@ -903,6 +916,7 @@ pyyaml==6.0.2
     #   lightning
     #   mlflow-skinny
     #   omegaconf
+    #   optuna
     #   peft
     #   pytorch-lightning
     #   ray
@@ -1065,6 +1079,7 @@ sortedcontainers==2.4.0
 soundfile==0.12.1
     # via
     #   -r requirements/test.in
+    #   genai-perf
     #   librosa
     #   mistral-common
 soxr==0.5.0.post1
@@ -1075,6 +1090,7 @@ sqlalchemy==2.0.41
     # via
     #   alembic
     #   mlflow
+    #   optuna
 sqlitedict==2.1.0
     # via lm-eval
 sqlparse==0.5.3
@@ -1204,6 +1220,7 @@ tqdm==4.66.6
     #   mteb
     #   nltk
     #   open-clip-torch
+    #   optuna
     #   peft
     #   pqdm
     #   pretrainedmodels
@@ -1226,10 +1243,8 @@ transformers-stream-generator==0.0.5
     # via -r requirements/test.in
 triton==3.5.1
     # via torch
-tritonclient==2.51.0
-    # via
-    #   -r requirements/test.in
-    #   genai-perf
+tritonclient==2.64.0
+    # via -r requirements/test.in
 typepy==1.3.2
     # via
     #   dataproperty

@@ -267,12 +267,16 @@ async def test_audio_with_max_tokens(mary_had_lamb, client_and_model):
     out_tokens = tok(out_text, add_special_tokens=False)["input_ids"]
     assert len(out_tokens) == 1
     # max_completion_tokens > max_model_len
+    # max_model_len=32768 for Gemma-3n-E2B-it
     transcription = await client.audio.transcriptions.create(
         model=model_name,
         file=mary_had_lamb,
         response_format="text",
         temperature=0.0,
-        extra_body={"max_completion_tokens": int(1e6)},
+        extra_body={
+            "max_completion_tokens": int(1e6),
+            "repetition_penalty": 1.3,
+        },
     )
     out = json.loads(transcription)
     out_text = out["text"]