From 09d4be0211c86c24128f85bdca0ffeb6bfe8b0f1 Mon Sep 17 00:00:00 2001 From: "rshaw@neuralmagic.com" Date: Sat, 28 Dec 2024 21:51:33 +0000 Subject: [PATCH 1/4] updated dependencies --- requirements-test.in | 2 +- requirements-test.txt | 122 +++++++++++++++++++++--------------------- 2 files changed, 63 insertions(+), 61 deletions(-) diff --git a/requirements-test.in b/requirements-test.in index fb4179c3d842..91aa7f473da0 100644 --- a/requirements-test.in +++ b/requirements-test.in @@ -22,7 +22,7 @@ transformers_stream_generator # required for qwen-vl test matplotlib # required for qwen-vl test mistral_common[opencv] >= 1.5.0 # required for pixtral test datamodel_code_generator # required for minicpm3 test -lm-eval[api]==0.4.4 # required for model evaluation test +lm-eval[api] @ git+https://github.com/robertgshaw2-neuralmagic/lm-evaluation-harness.git@streaming-api # required for model evaluation test with streaming # quantization bitsandbytes>=0.45.0 diff --git a/requirements-test.txt b/requirements-test.txt index 3771577fe8ed..6d563efffcd1 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -2,32 +2,32 @@ # This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# python3.12 -m piptools compile requirements-test.in -o requirements-test.txt +# pip-compile --output-file=requirements-test.txt requirements-test.in # absl-py==2.1.0 # via rouge-score -accelerate==1.0.1 +accelerate==1.2.1 # via # lm-eval # peft -aiohappyeyeballs==2.4.3 +aiohappyeyeballs==2.4.4 # via aiohttp -aiohttp==3.10.10 +aiohttp==3.11.11 # via # datasets # fsspec # lm-eval -aiosignal==1.3.1 +aiosignal==1.3.2 # via # aiohttp # ray annotated-types==0.7.0 # via pydantic -anyio==4.6.2.post1 +anyio==4.7.0 # via httpx -argcomplete==3.5.1 +argcomplete==3.5.2 # via datamodel-code-generator -attrs==24.2.0 +attrs==24.3.0 # via # aiohttp # jsonlines @@ -35,22 +35,22 @@ attrs==24.2.0 # referencing audioread==3.0.1 # via librosa -awscli==1.35.23 +awscli==1.36.31 # via -r requirements-test.in -bitsandbytes>=0.45.0 +bitsandbytes==0.45.0 # via -r requirements-test.in black==24.10.0 # via datamodel-code-generator -boto3==1.35.57 +boto3==1.35.90 # via tensorizer -botocore==1.35.57 +botocore==1.35.90 # via # awscli # boto3 # s3transfer buildkite-test-collector==0.1.9 # via -r requirements-test.in -certifi==2024.8.30 +certifi==2024.12.14 # via # httpcore # httpx @@ -59,9 +59,9 @@ cffi==1.17.1 # via soundfile chardet==5.2.0 # via mbstrdecoder -charset-normalizer==3.4.0 +charset-normalizer==3.4.1 # via requests -click==8.1.7 +click==8.1.8 # via # black # nltk @@ -71,19 +71,19 @@ colorama==0.4.6 # awscli # sacrebleu # tqdm-multiprocess -contourpy==1.3.0 +contourpy==1.3.1 # via matplotlib cupy-cuda12x==13.3.0 # via ray cycler==0.12.1 # via matplotlib -datamodel-code-generator==0.26.3 +datamodel-code-generator==0.26.4 # via -r requirements-test.in dataproperty==1.0.1 # via # pytablewriter # tabledata -datasets==3.0.2 +datasets==3.2.0 # via # evaluate # lm-eval @@ -107,7 +107,7 @@ email-validator==2.2.0 # via pydantic evaluate==0.4.3 # via lm-eval -fastrlock==0.8.2 +fastrlock==0.8.3 # via cupy-cuda12x filelock==3.16.1 # via @@ -117,7 +117,7 @@ filelock==3.16.1 # torch # transformers # triton -fonttools==4.54.1 +fonttools==4.55.3 # via matplotlib frozenlist==1.5.0 # via @@ -134,13 +134,13 @@ genson==1.3.0 # via datamodel-code-generator h11==0.14.0 # via httpcore -hiredis==3.0.0 +hiredis==3.1.0 # via tensorizer -httpcore==1.0.6 +httpcore==1.0.7 # via httpx -httpx==0.27.2 +httpx==0.28.1 # via -r requirements-test.in -huggingface-hub==0.26.2 +huggingface-hub==0.27.0 # via # accelerate # datasets @@ -163,7 +163,7 @@ iniconfig==2.0.0 # via pytest isort==5.13.2 # via datamodel-code-generator -jinja2==3.1.4 +jinja2==3.1.5 # via # datamodel-code-generator # torch @@ -184,7 +184,7 @@ jsonschema==4.23.0 # ray jsonschema-specifications==2024.10.1 # via jsonschema -kiwisolver==1.4.7 +kiwisolver==1.4.8 # via matplotlib lazy-loader==0.4 # via librosa @@ -194,13 +194,13 @@ librosa==0.10.2.post1 # via -r requirements-test.in llvmlite==0.43.0 # via numba -lm-eval[api]==0.4.4 +lm-eval[api] @ git+https://github.com/robertgshaw2-neuralmagic/lm-evaluation-harness.git@streaming-api # via -r requirements-test.in lxml==5.3.0 # via sacrebleu markupsafe==3.0.2 # via jinja2 -matplotlib==3.9.2 +matplotlib==3.10.0 # via -r requirements-test.in mbstrdecoder==1.1.3 # via @@ -229,13 +229,13 @@ multiprocess==0.70.16 # evaluate mypy-extensions==1.0.0 # via black -networkx==3.2.1 +networkx==3.4.2 # via torch nltk==3.9.1 # via rouge-score numba==0.60.0 # via librosa -numexpr==2.10.1 +numexpr==2.10.2 # via lm-eval numpy==1.26.4 # via @@ -297,7 +297,7 @@ nvidia-nvtx-cu12==12.4.127 # via torch opencv-python-headless==4.10.0.84 # via mistral-common -packaging==24.1 +packaging==24.2 # via # accelerate # black @@ -322,7 +322,7 @@ pathspec==0.12.1 # via black pathvalidate==3.2.1 # via pytablewriter -peft==0.13.2 +peft==0.14.0 # via # -r requirements-test.in # lm-eval @@ -340,22 +340,24 @@ pluggy==1.5.0 # via pytest pooch==1.8.2 # via librosa -portalocker==2.10.1 +portalocker==3.0.0 # via sacrebleu -propcache==0.2.0 - # via yarl -protobuf==5.28.3 +propcache==0.2.1 + # via + # aiohttp + # yarl +protobuf==5.29.2 # via # ray # tensorizer -psutil==6.1.0 +psutil==6.1.1 # via # accelerate # peft # tensorizer py==1.11.0 # via pytest-forked -pyarrow==18.0.0 +pyarrow==18.1.0 # via datasets pyasn1==0.6.1 # via rsa @@ -363,17 +365,17 @@ pybind11==2.13.6 # via lm-eval pycparser==2.22 # via cffi -pydantic[email]==2.9.2 +pydantic[email]==2.10.4 # via # datamodel-code-generator # mistral-common -pydantic-core==2.23.4 +pydantic-core==2.27.2 # via pydantic pyparsing==3.2.0 # via matplotlib pytablewriter==1.2.0 # via lm-eval -pytest==8.3.3 +pytest==8.3.4 # via # -r requirements-test.in # buildkite-test-collector @@ -381,11 +383,11 @@ pytest==8.3.3 # pytest-forked # pytest-rerunfailures # pytest-shard -pytest-asyncio==0.24.0 +pytest-asyncio==0.25.0 # via -r requirements-test.in pytest-forked==1.6.0 # via -r requirements-test.in -pytest-rerunfailures==14.0 +pytest-rerunfailures==15.0 # via -r requirements-test.in pytest-shard==0.1.2 # via -r requirements-test.in @@ -412,13 +414,13 @@ pyyaml==6.0.2 # transformers ray[adag]==2.40.0 # via -r requirements-test.in -redis==5.2.0 +redis==5.2.1 # via tensorizer referencing==0.35.1 # via # jsonschema # jsonschema-specifications -regex==2024.9.11 +regex==2024.11.6 # via # nltk # sacrebleu @@ -438,13 +440,13 @@ requests==2.32.3 # transformers rouge-score==0.1.2 # via lm-eval -rpds-py==0.20.1 +rpds-py==0.22.3 # via # jsonschema # referencing rsa==4.7.2 # via awscli -s3transfer==0.10.3 +s3transfer==0.10.4 # via # awscli # boto3 @@ -456,28 +458,26 @@ safetensors==0.4.5 # peft # timm # transformers -scikit-learn==1.5.2 +scikit-learn==1.6.0 # via # librosa # lm-eval # sentence-transformers -scipy==1.13.1 +scipy==1.14.1 # via # librosa # scikit-learn # sentence-transformers -sentence-transformers==3.2.1 +sentence-transformers==3.3.1 # via -r requirements-test.in sentencepiece==0.2.0 # via mistral-common -six==1.16.0 +six==1.17.0 # via # python-dateutil # rouge-score sniffio==1.3.1 - # via - # anyio - # httpx + # via anyio soundfile==0.12.1 # via # -r requirements-test.in @@ -496,7 +496,7 @@ tcolorpy==0.1.6 # via pytablewriter tenacity==9.0.0 # via lm-eval -tensorizer==2.9.0 +tensorizer==2.9.1 # via -r requirements-test.in threadpoolctl==3.5.0 # via scikit-learn @@ -504,7 +504,7 @@ tiktoken==0.7.0 # via # lm-eval # mistral-common -timm==1.0.11 +timm==1.0.12 # via -r requirements-test.in tokenizers==0.21.0 # via transformers @@ -521,7 +521,7 @@ torch==2.5.1 # torchvision torchvision==0.20.1 # via timm -tqdm==4.66.6 +tqdm==4.67.1 # via # datasets # evaluate @@ -534,7 +534,7 @@ tqdm==4.66.6 # transformers tqdm-multiprocess==0.0.11 # via lm-eval -transformers==4.47.0 +transformers==4.47.1 # via # lm-eval # peft @@ -551,6 +551,8 @@ typepy[datetime]==1.3.2 # tabledata typing-extensions==4.12.2 # via + # anyio + # bitsandbytes # huggingface-hub # librosa # mistral-common @@ -559,7 +561,7 @@ typing-extensions==4.12.2 # torch tzdata==2024.2 # via pandas -urllib3==1.26.20 +urllib3==2.3.0 # via # botocore # requests @@ -569,7 +571,7 @@ xxhash==3.5.0 # via # datasets # evaluate -yarl==1.17.1 +yarl==1.18.3 # via aiohttp zstandard==0.23.0 # via lm-eval From b1eaf23826a49719e9f76166eab2fe0cbd8deb6f Mon Sep 17 00:00:00 2001 From: "rshaw@neuralmagic.com" Date: Thu, 9 Jan 2025 18:04:24 +0000 Subject: [PATCH 2/4] updated --- .buildkite/test-pipeline.yaml | 3 + requirements-test.txt | 126 +++++++++++++++++----------------- 2 files changed, 67 insertions(+), 62 deletions(-) diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml index b563c96343f9..bba17e8b152f 100644 --- a/.buildkite/test-pipeline.yaml +++ b/.buildkite/test-pipeline.yaml @@ -180,6 +180,9 @@ steps: - tests/v1 commands: - VLLM_USE_V1=1 pytest -v -s v1 + # Test streaming is working properly. Requires special branch. + - pip install -U git+https://github.com/robertgshaw2-neuralmagic/lm-evaluation-harness.git@streaming-api + - pytest -v -s entrypoints/openai/test_accuracy.py::test_lm_eval_accuracy_v1_engine - label: Examples Test # 25min working_dir: "/vllm-workspace/examples" diff --git a/requirements-test.txt b/requirements-test.txt index 6d563efffcd1..f576e42afcbb 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -2,32 +2,32 @@ # This file is autogenerated by pip-compile with Python 3.12 # by the following command: # -# pip-compile --output-file=requirements-test.txt requirements-test.in +# python3.12 -m piptools compile requirements-test.in -o requirements-test.txt # absl-py==2.1.0 # via rouge-score -accelerate==1.2.1 +accelerate==1.0.1 # via # lm-eval # peft -aiohappyeyeballs==2.4.4 +aiohappyeyeballs==2.4.3 # via aiohttp -aiohttp==3.11.11 +aiohttp==3.10.10 # via # datasets # fsspec # lm-eval -aiosignal==1.3.2 +aiosignal==1.3.1 # via # aiohttp # ray annotated-types==0.7.0 # via pydantic -anyio==4.7.0 +anyio==4.6.2.post1 # via httpx -argcomplete==3.5.2 +argcomplete==3.5.1 # via datamodel-code-generator -attrs==24.3.0 +attrs==24.2.0 # via # aiohttp # jsonlines @@ -35,22 +35,24 @@ attrs==24.3.0 # referencing audioread==3.0.1 # via librosa -awscli==1.36.31 +awscli==1.35.23 # via -r requirements-test.in -bitsandbytes==0.45.0 +bitsandbytes>=0.45.0 # via -r requirements-test.in black==24.10.0 # via datamodel-code-generator -boto3==1.35.90 +boto3==1.35.57 # via tensorizer -botocore==1.35.90 +botocore==1.35.57 # via # awscli # boto3 # s3transfer +bounded-pool-executor==0.0.3 + # via pqdm buildkite-test-collector==0.1.9 # via -r requirements-test.in -certifi==2024.12.14 +certifi==2024.8.30 # via # httpcore # httpx @@ -59,9 +61,9 @@ cffi==1.17.1 # via soundfile chardet==5.2.0 # via mbstrdecoder -charset-normalizer==3.4.1 +charset-normalizer==3.4.0 # via requests -click==8.1.8 +click==8.1.7 # via # black # nltk @@ -71,19 +73,19 @@ colorama==0.4.6 # awscli # sacrebleu # tqdm-multiprocess -contourpy==1.3.1 +contourpy==1.3.0 # via matplotlib cupy-cuda12x==13.3.0 # via ray cycler==0.12.1 # via matplotlib -datamodel-code-generator==0.26.4 +datamodel-code-generator==0.26.3 # via -r requirements-test.in dataproperty==1.0.1 # via # pytablewriter # tabledata -datasets==3.2.0 +datasets==3.0.2 # via # evaluate # lm-eval @@ -107,7 +109,7 @@ email-validator==2.2.0 # via pydantic evaluate==0.4.3 # via lm-eval -fastrlock==0.8.3 +fastrlock==0.8.2 # via cupy-cuda12x filelock==3.16.1 # via @@ -117,7 +119,7 @@ filelock==3.16.1 # torch # transformers # triton -fonttools==4.55.3 +fonttools==4.54.1 # via matplotlib frozenlist==1.5.0 # via @@ -134,13 +136,13 @@ genson==1.3.0 # via datamodel-code-generator h11==0.14.0 # via httpcore -hiredis==3.1.0 +hiredis==3.0.0 # via tensorizer -httpcore==1.0.7 +httpcore==1.0.6 # via httpx -httpx==0.28.1 +httpx==0.27.2 # via -r requirements-test.in -huggingface-hub==0.27.0 +huggingface-hub==0.26.2 # via # accelerate # datasets @@ -163,7 +165,7 @@ iniconfig==2.0.0 # via pytest isort==5.13.2 # via datamodel-code-generator -jinja2==3.1.5 +jinja2==3.1.4 # via # datamodel-code-generator # torch @@ -184,7 +186,7 @@ jsonschema==4.23.0 # ray jsonschema-specifications==2024.10.1 # via jsonschema -kiwisolver==1.4.8 +kiwisolver==1.4.7 # via matplotlib lazy-loader==0.4 # via librosa @@ -194,13 +196,13 @@ librosa==0.10.2.post1 # via -r requirements-test.in llvmlite==0.43.0 # via numba -lm-eval[api] @ git+https://github.com/robertgshaw2-neuralmagic/lm-evaluation-harness.git@streaming-api +lm-eval[api]==0.4.4 # via -r requirements-test.in lxml==5.3.0 # via sacrebleu markupsafe==3.0.2 # via jinja2 -matplotlib==3.10.0 +matplotlib==3.9.2 # via -r requirements-test.in mbstrdecoder==1.1.3 # via @@ -229,13 +231,13 @@ multiprocess==0.70.16 # evaluate mypy-extensions==1.0.0 # via black -networkx==3.4.2 +networkx==3.2.1 # via torch nltk==3.9.1 # via rouge-score numba==0.60.0 # via librosa -numexpr==2.10.2 +numexpr==2.10.1 # via lm-eval numpy==1.26.4 # via @@ -297,7 +299,7 @@ nvidia-nvtx-cu12==12.4.127 # via torch opencv-python-headless==4.10.0.84 # via mistral-common -packaging==24.2 +packaging==24.1 # via # accelerate # black @@ -322,7 +324,7 @@ pathspec==0.12.1 # via black pathvalidate==3.2.1 # via pytablewriter -peft==0.14.0 +peft==0.13.2 # via # -r requirements-test.in # lm-eval @@ -340,24 +342,24 @@ pluggy==1.5.0 # via pytest pooch==1.8.2 # via librosa -portalocker==3.0.0 +portalocker==2.10.1 # via sacrebleu -propcache==0.2.1 - # via - # aiohttp - # yarl -protobuf==5.29.2 +pqdm==0.2.0 + # via -r requirements-test.in +propcache==0.2.0 + # via yarl +protobuf==5.28.3 # via # ray # tensorizer -psutil==6.1.1 +psutil==6.1.0 # via # accelerate # peft # tensorizer py==1.11.0 # via pytest-forked -pyarrow==18.1.0 +pyarrow==18.0.0 # via datasets pyasn1==0.6.1 # via rsa @@ -365,17 +367,17 @@ pybind11==2.13.6 # via lm-eval pycparser==2.22 # via cffi -pydantic[email]==2.10.4 +pydantic[email]==2.9.2 # via # datamodel-code-generator # mistral-common -pydantic-core==2.27.2 +pydantic-core==2.23.4 # via pydantic pyparsing==3.2.0 # via matplotlib pytablewriter==1.2.0 # via lm-eval -pytest==8.3.4 +pytest==8.3.3 # via # -r requirements-test.in # buildkite-test-collector @@ -383,11 +385,11 @@ pytest==8.3.4 # pytest-forked # pytest-rerunfailures # pytest-shard -pytest-asyncio==0.25.0 +pytest-asyncio==0.24.0 # via -r requirements-test.in pytest-forked==1.6.0 # via -r requirements-test.in -pytest-rerunfailures==15.0 +pytest-rerunfailures==14.0 # via -r requirements-test.in pytest-shard==0.1.2 # via -r requirements-test.in @@ -414,13 +416,13 @@ pyyaml==6.0.2 # transformers ray[adag]==2.40.0 # via -r requirements-test.in -redis==5.2.1 +redis==5.2.0 # via tensorizer referencing==0.35.1 # via # jsonschema # jsonschema-specifications -regex==2024.11.6 +regex==2024.9.11 # via # nltk # sacrebleu @@ -440,13 +442,13 @@ requests==2.32.3 # transformers rouge-score==0.1.2 # via lm-eval -rpds-py==0.22.3 +rpds-py==0.20.1 # via # jsonschema # referencing rsa==4.7.2 # via awscli -s3transfer==0.10.4 +s3transfer==0.10.3 # via # awscli # boto3 @@ -458,26 +460,28 @@ safetensors==0.4.5 # peft # timm # transformers -scikit-learn==1.6.0 +scikit-learn==1.5.2 # via # librosa # lm-eval # sentence-transformers -scipy==1.14.1 +scipy==1.13.1 # via # librosa # scikit-learn # sentence-transformers -sentence-transformers==3.3.1 +sentence-transformers==3.2.1 # via -r requirements-test.in sentencepiece==0.2.0 # via mistral-common -six==1.17.0 +six==1.16.0 # via # python-dateutil # rouge-score sniffio==1.3.1 - # via anyio + # via + # anyio + # httpx soundfile==0.12.1 # via # -r requirements-test.in @@ -496,7 +500,7 @@ tcolorpy==0.1.6 # via pytablewriter tenacity==9.0.0 # via lm-eval -tensorizer==2.9.1 +tensorizer==2.9.0 # via -r requirements-test.in threadpoolctl==3.5.0 # via scikit-learn @@ -504,7 +508,7 @@ tiktoken==0.7.0 # via # lm-eval # mistral-common -timm==1.0.12 +timm==1.0.11 # via -r requirements-test.in tokenizers==0.21.0 # via transformers @@ -521,7 +525,7 @@ torch==2.5.1 # torchvision torchvision==0.20.1 # via timm -tqdm==4.67.1 +tqdm==4.66.6 # via # datasets # evaluate @@ -534,7 +538,7 @@ tqdm==4.67.1 # transformers tqdm-multiprocess==0.0.11 # via lm-eval -transformers==4.47.1 +transformers==4.47.0 # via # lm-eval # peft @@ -551,8 +555,6 @@ typepy[datetime]==1.3.2 # tabledata typing-extensions==4.12.2 # via - # anyio - # bitsandbytes # huggingface-hub # librosa # mistral-common @@ -561,7 +563,7 @@ typing-extensions==4.12.2 # torch tzdata==2024.2 # via pandas -urllib3==2.3.0 +urllib3==1.26.20 # via # botocore # requests @@ -571,7 +573,7 @@ xxhash==3.5.0 # via # datasets # evaluate -yarl==1.18.3 +yarl==1.17.1 # via aiohttp zstandard==0.23.0 # via lm-eval From 237fcc3880c5c2bbc96bcb72858e02ca74437c6c Mon Sep 17 00:00:00 2001 From: "rshaw@neuralmagic.com" Date: Thu, 9 Jan 2025 18:04:36 +0000 Subject: [PATCH 3/4] revert --- requirements-test.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements-test.in b/requirements-test.in index 91aa7f473da0..4b4dc376d1fa 100644 --- a/requirements-test.in +++ b/requirements-test.in @@ -13,6 +13,7 @@ einops # required for MPT, qwen-vl and Mamba httpx librosa # required for audio tests peft +pqdm ray[adag]==2.40.0 sentence-transformers # required for embedding tests soundfile # required for audio tests @@ -22,7 +23,7 @@ transformers_stream_generator # required for qwen-vl test matplotlib # required for qwen-vl test mistral_common[opencv] >= 1.5.0 # required for pixtral test datamodel_code_generator # required for minicpm3 test -lm-eval[api] @ git+https://github.com/robertgshaw2-neuralmagic/lm-evaluation-harness.git@streaming-api # required for model evaluation test with streaming +lm-eval[api]==0.4.4 # required for model evaluation test # quantization bitsandbytes>=0.45.0 From 6b9fecec01f9c717e2b20603cee98d01971de806 Mon Sep 17 00:00:00 2001 From: "rshaw@neuralmagic.com" Date: Thu, 9 Jan 2025 18:10:59 +0000 Subject: [PATCH 4/4] revert --- requirements-test.in | 1 - requirements-test.txt | 4 ---- 2 files changed, 5 deletions(-) diff --git a/requirements-test.in b/requirements-test.in index 4b4dc376d1fa..fb4179c3d842 100644 --- a/requirements-test.in +++ b/requirements-test.in @@ -13,7 +13,6 @@ einops # required for MPT, qwen-vl and Mamba httpx librosa # required for audio tests peft -pqdm ray[adag]==2.40.0 sentence-transformers # required for embedding tests soundfile # required for audio tests diff --git a/requirements-test.txt b/requirements-test.txt index f576e42afcbb..3771577fe8ed 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -48,8 +48,6 @@ botocore==1.35.57 # awscli # boto3 # s3transfer -bounded-pool-executor==0.0.3 - # via pqdm buildkite-test-collector==0.1.9 # via -r requirements-test.in certifi==2024.8.30 @@ -344,8 +342,6 @@ pooch==1.8.2 # via librosa portalocker==2.10.1 # via sacrebleu -pqdm==0.2.0 - # via -r requirements-test.in propcache==0.2.0 # via yarl protobuf==5.28.3