Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements/common.txt
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ pyzmq >= 25.0.0
msgspec
gguf >= 0.17.0
mistral_common[image] >= 1.8.8
opencv-python-headless >= 4.11.0 # required for video IO
opencv-python-headless >= 4.13.0 # required for video IO
pyyaml
six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12
setuptools>=77.0.3,<81.0.0; python_version > '3.11' # Setuptools is used by triton, we need to ensure a modern version is installed for 3.12+ so that it does not try to import distutils, which was removed in 3.12
Expand Down
6 changes: 3 additions & 3 deletions requirements/nightly_torch_test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ transformers_stream_generator # required for qwen-vl test
matplotlib # required for qwen-vl test
mistral_common[image,audio] >= 1.8.8 # required for voxtral test
num2words # required for smolvlm test
opencv-python-headless >= 4.11.0 # required for video test
opencv-python-headless >= 4.13.0 # required for video test
datamodel_code_generator # required for minicpm3 test
lm-eval[api]>=0.4.9.2 # required for model evaluation test
mteb>=1.38.11, <2 # required for mteb test
Expand All @@ -37,8 +37,8 @@ bitsandbytes>=0.46.1
buildkite-test-collector==0.1.9


genai_perf==0.0.8
tritonclient==2.51.0
genai_perf>=0.0.8
tritonclient>=2.51.0

numba == 0.61.2 # Required for N-gram speculative decoding
numpy
Expand Down
6 changes: 3 additions & 3 deletions requirements/test.in
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ matplotlib # required for qwen-vl test
mistral_common[image,audio] >= 1.8.8 # required for voxtral test
num2words # required for smolvlm test
open_clip_torch==2.32.0 # Required for nemotron_vl test, Nemotron Parse in test_common.py
opencv-python-headless >= 4.11.0 # required for video test
opencv-python-headless >= 4.13.0 # required for video test
datamodel_code_generator # required for minicpm3 test
lm-eval[api]>=0.4.9.2 # required for model evaluation test
mteb[bm25s]>=2, <3 # required for mteb test
Expand All @@ -45,8 +45,8 @@ bitsandbytes==0.46.1
buildkite-test-collector==0.1.9


genai_perf==0.0.8
tritonclient==2.51.0
genai_perf>=0.0.8
tritonclient>=2.51.0

arctic-inference == 0.1.1 # Required for suffix decoding test
numba == 0.61.2 # Required for N-gram speculative decoding
Expand Down
31 changes: 23 additions & 8 deletions requirements/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ albumentations==1.4.6
# -r requirements/test.in
# terratorch
alembic==1.16.4
# via mlflow
# via
# mlflow
# optuna
annotated-doc==0.0.4
# via fastapi
annotated-types==0.7.0
Expand Down Expand Up @@ -145,6 +147,8 @@ colorama==0.4.6
# tqdm-multiprocess
colorful==0.5.6
# via ray
colorlog==6.10.1
# via optuna
contourpy==1.3.0
# via matplotlib
coverage==7.10.6
Expand Down Expand Up @@ -252,7 +256,7 @@ fsspec==2024.9.0
# torch
ftfy==6.3.1
# via open-clip-torch
genai-perf==0.0.8
genai-perf==0.0.16
# via -r requirements/test.in
genson==1.3.0
# via datamodel-code-generator
Expand Down Expand Up @@ -389,6 +393,7 @@ jinja2==3.1.6
# via
# datamodel-code-generator
# flask
# genai-perf
# mlflow
# torch
jiwer==3.0.5
Expand Down Expand Up @@ -528,7 +533,7 @@ numba==0.61.2
# librosa
numexpr==2.10.1
# via lm-eval
numpy==1.26.4
numpy==2.2.6
# via
# -r requirements/test.in
# accelerate
Expand Down Expand Up @@ -558,6 +563,7 @@ numpy==1.26.4
# numba
# numexpr
# opencv-python-headless
# optuna
# pandas
# patsy
# peft
Expand Down Expand Up @@ -637,7 +643,7 @@ opencensus==0.11.4
# via ray
opencensus-context==0.1.3
# via opencensus
opencv-python-headless==4.11.0.86
opencv-python-headless==4.13.0.90
# via
# -r requirements/test.in
# albucore
Expand All @@ -660,6 +666,10 @@ opentelemetry-sdk==1.35.0
# ray
opentelemetry-semantic-conventions==0.56b0
# via opentelemetry-sdk
optuna==3.6.1
# via genai-perf
orjson==3.11.5
# via genai-perf
packaging==24.2
# via
# accelerate
Expand All @@ -678,6 +688,7 @@ packaging==24.2
# lightning-utilities
# matplotlib
# mlflow-skinny
# optuna
# peft
# plotly
# pooch
Expand Down Expand Up @@ -717,6 +728,8 @@ peft==0.16.0
# lm-eval
perceptron==0.1.4
# via -r requirements/test.in
perf-analyzer==0.1.0
# via genai-perf
pillow==10.4.0
# via
# genai-perf
Expand Down Expand Up @@ -903,6 +916,7 @@ pyyaml==6.0.2
# lightning
# mlflow-skinny
# omegaconf
# optuna
# peft
# pytorch-lightning
# ray
Expand Down Expand Up @@ -1065,6 +1079,7 @@ sortedcontainers==2.4.0
soundfile==0.12.1
# via
# -r requirements/test.in
# genai-perf
# librosa
# mistral-common
soxr==0.5.0.post1
Expand All @@ -1075,6 +1090,7 @@ sqlalchemy==2.0.41
# via
# alembic
# mlflow
# optuna
sqlitedict==2.1.0
# via lm-eval
sqlparse==0.5.3
Expand Down Expand Up @@ -1204,6 +1220,7 @@ tqdm==4.66.6
# mteb
# nltk
# open-clip-torch
# optuna
# peft
# pqdm
# pretrainedmodels
Expand All @@ -1226,10 +1243,8 @@ transformers-stream-generator==0.0.5
# via -r requirements/test.in
triton==3.5.1
# via torch
tritonclient==2.51.0
# via
# -r requirements/test.in
# genai-perf
tritonclient==2.64.0
# via -r requirements/test.in
typepy==1.3.2
# via
# dataproperty
Expand Down
6 changes: 5 additions & 1 deletion tests/entrypoints/openai/test_translation_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -267,12 +267,16 @@ async def test_audio_with_max_tokens(mary_had_lamb, client_and_model):
out_tokens = tok(out_text, add_special_tokens=False)["input_ids"]
assert len(out_tokens) == 1
# max_completion_tokens > max_model_len
# max_model_len=32768 for Gemma-3n-E2B-it
transcription = await client.audio.transcriptions.create(
model=model_name,
file=mary_had_lamb,
response_format="text",
temperature=0.0,
extra_body={"max_completion_tokens": int(1e6)},
extra_body={
"max_completion_tokens": int(1e6),
"repetition_penalty": 1.3,
},
)
out = json.loads(transcription)
out_text = out["text"]
Expand Down