From 09d4be0211c86c24128f85bdca0ffeb6bfe8b0f1 Mon Sep 17 00:00:00 2001
From: "rshaw@neuralmagic.com" <rshaw@neuralmagic.com>
Date: Sat, 28 Dec 2024 21:51:33 +0000
Subject: [PATCH 1/4] updated dependencies

---
 requirements-test.in  |   2 +-
 requirements-test.txt | 122 +++++++++++++++++++++---------------------
 2 files changed, 63 insertions(+), 61 deletions(-)

diff --git a/requirements-test.in b/requirements-test.in
index fb4179c3d842..91aa7f473da0 100644
--- a/requirements-test.in
+++ b/requirements-test.in
@@ -22,7 +22,7 @@ transformers_stream_generator # required for qwen-vl test
 matplotlib # required for qwen-vl test
 mistral_common[opencv] >= 1.5.0 # required for pixtral test
 datamodel_code_generator # required for minicpm3 test
-lm-eval[api]==0.4.4 # required for model evaluation test
+lm-eval[api] @ git+https://github.com/robertgshaw2-neuralmagic/lm-evaluation-harness.git@streaming-api # required for model evaluation test with streaming
 
 # quantization
 bitsandbytes>=0.45.0
diff --git a/requirements-test.txt b/requirements-test.txt
index 3771577fe8ed..6d563efffcd1 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -2,32 +2,32 @@
 # This file is autogenerated by pip-compile with Python 3.12
 # by the following command:
 #
-#    python3.12 -m piptools compile requirements-test.in -o requirements-test.txt
+#    pip-compile --output-file=requirements-test.txt requirements-test.in
 #
 absl-py==2.1.0
     # via rouge-score
-accelerate==1.0.1
+accelerate==1.2.1
     # via
     #   lm-eval
     #   peft
-aiohappyeyeballs==2.4.3
+aiohappyeyeballs==2.4.4
     # via aiohttp
-aiohttp==3.10.10
+aiohttp==3.11.11
     # via
     #   datasets
     #   fsspec
     #   lm-eval
-aiosignal==1.3.1
+aiosignal==1.3.2
     # via
     #   aiohttp
     #   ray
 annotated-types==0.7.0
     # via pydantic
-anyio==4.6.2.post1
+anyio==4.7.0
     # via httpx
-argcomplete==3.5.1
+argcomplete==3.5.2
     # via datamodel-code-generator
-attrs==24.2.0
+attrs==24.3.0
     # via
     #   aiohttp
     #   jsonlines
@@ -35,22 +35,22 @@ attrs==24.2.0
     #   referencing
 audioread==3.0.1
     # via librosa
-awscli==1.35.23
+awscli==1.36.31
     # via -r requirements-test.in
-bitsandbytes>=0.45.0
+bitsandbytes==0.45.0
     # via -r requirements-test.in
 black==24.10.0
     # via datamodel-code-generator
-boto3==1.35.57
+boto3==1.35.90
     # via tensorizer
-botocore==1.35.57
+botocore==1.35.90
     # via
     #   awscli
     #   boto3
     #   s3transfer
 buildkite-test-collector==0.1.9
     # via -r requirements-test.in
-certifi==2024.8.30
+certifi==2024.12.14
     # via
     #   httpcore
     #   httpx
@@ -59,9 +59,9 @@ cffi==1.17.1
     # via soundfile
 chardet==5.2.0
     # via mbstrdecoder
-charset-normalizer==3.4.0
+charset-normalizer==3.4.1
     # via requests
-click==8.1.7
+click==8.1.8
     # via
     #   black
     #   nltk
@@ -71,19 +71,19 @@ colorama==0.4.6
     #   awscli
     #   sacrebleu
     #   tqdm-multiprocess
-contourpy==1.3.0
+contourpy==1.3.1
     # via matplotlib
 cupy-cuda12x==13.3.0
     # via ray
 cycler==0.12.1
     # via matplotlib
-datamodel-code-generator==0.26.3
+datamodel-code-generator==0.26.4
     # via -r requirements-test.in
 dataproperty==1.0.1
     # via
     #   pytablewriter
     #   tabledata
-datasets==3.0.2
+datasets==3.2.0
     # via
     #   evaluate
     #   lm-eval
@@ -107,7 +107,7 @@ email-validator==2.2.0
     # via pydantic
 evaluate==0.4.3
     # via lm-eval
-fastrlock==0.8.2
+fastrlock==0.8.3
     # via cupy-cuda12x
 filelock==3.16.1
     # via
@@ -117,7 +117,7 @@ filelock==3.16.1
     #   torch
     #   transformers
     #   triton
-fonttools==4.54.1
+fonttools==4.55.3
     # via matplotlib
 frozenlist==1.5.0
     # via
@@ -134,13 +134,13 @@ genson==1.3.0
     # via datamodel-code-generator
 h11==0.14.0
     # via httpcore
-hiredis==3.0.0
+hiredis==3.1.0
     # via tensorizer
-httpcore==1.0.6
+httpcore==1.0.7
     # via httpx
-httpx==0.27.2
+httpx==0.28.1
     # via -r requirements-test.in
-huggingface-hub==0.26.2
+huggingface-hub==0.27.0
     # via
     #   accelerate
     #   datasets
@@ -163,7 +163,7 @@ iniconfig==2.0.0
     # via pytest
 isort==5.13.2
     # via datamodel-code-generator
-jinja2==3.1.4
+jinja2==3.1.5
     # via
     #   datamodel-code-generator
     #   torch
@@ -184,7 +184,7 @@ jsonschema==4.23.0
     #   ray
 jsonschema-specifications==2024.10.1
     # via jsonschema
-kiwisolver==1.4.7
+kiwisolver==1.4.8
     # via matplotlib
 lazy-loader==0.4
     # via librosa
@@ -194,13 +194,13 @@ librosa==0.10.2.post1
     # via -r requirements-test.in
 llvmlite==0.43.0
     # via numba
-lm-eval[api]==0.4.4
+lm-eval[api] @ git+https://github.com/robertgshaw2-neuralmagic/lm-evaluation-harness.git@streaming-api
     # via -r requirements-test.in
 lxml==5.3.0
     # via sacrebleu
 markupsafe==3.0.2
     # via jinja2
-matplotlib==3.9.2
+matplotlib==3.10.0
     # via -r requirements-test.in
 mbstrdecoder==1.1.3
     # via
@@ -229,13 +229,13 @@ multiprocess==0.70.16
     #   evaluate
 mypy-extensions==1.0.0
     # via black
-networkx==3.2.1
+networkx==3.4.2
     # via torch
 nltk==3.9.1
     # via rouge-score
 numba==0.60.0
     # via librosa
-numexpr==2.10.1
+numexpr==2.10.2
     # via lm-eval
 numpy==1.26.4
     # via
@@ -297,7 +297,7 @@ nvidia-nvtx-cu12==12.4.127
     # via torch
 opencv-python-headless==4.10.0.84
     # via mistral-common
-packaging==24.1
+packaging==24.2
     # via
     #   accelerate
     #   black
@@ -322,7 +322,7 @@ pathspec==0.12.1
     # via black
 pathvalidate==3.2.1
     # via pytablewriter
-peft==0.13.2
+peft==0.14.0
     # via
     #   -r requirements-test.in
     #   lm-eval
@@ -340,22 +340,24 @@ pluggy==1.5.0
     # via pytest
 pooch==1.8.2
     # via librosa
-portalocker==2.10.1
+portalocker==3.0.0
     # via sacrebleu
-propcache==0.2.0
-    # via yarl
-protobuf==5.28.3
+propcache==0.2.1
+    # via
+    #   aiohttp
+    #   yarl
+protobuf==5.29.2
     # via
     #   ray
     #   tensorizer
-psutil==6.1.0
+psutil==6.1.1
     # via
     #   accelerate
     #   peft
     #   tensorizer
 py==1.11.0
     # via pytest-forked
-pyarrow==18.0.0
+pyarrow==18.1.0
     # via datasets
 pyasn1==0.6.1
     # via rsa
@@ -363,17 +365,17 @@ pybind11==2.13.6
     # via lm-eval
 pycparser==2.22
     # via cffi
-pydantic[email]==2.9.2
+pydantic[email]==2.10.4
     # via
     #   datamodel-code-generator
     #   mistral-common
-pydantic-core==2.23.4
+pydantic-core==2.27.2
     # via pydantic
 pyparsing==3.2.0
     # via matplotlib
 pytablewriter==1.2.0
     # via lm-eval
-pytest==8.3.3
+pytest==8.3.4
     # via
     #   -r requirements-test.in
     #   buildkite-test-collector
@@ -381,11 +383,11 @@ pytest==8.3.3
     #   pytest-forked
     #   pytest-rerunfailures
     #   pytest-shard
-pytest-asyncio==0.24.0
+pytest-asyncio==0.25.0
     # via -r requirements-test.in
 pytest-forked==1.6.0
     # via -r requirements-test.in
-pytest-rerunfailures==14.0
+pytest-rerunfailures==15.0
     # via -r requirements-test.in
 pytest-shard==0.1.2
     # via -r requirements-test.in
@@ -412,13 +414,13 @@ pyyaml==6.0.2
     #   transformers
 ray[adag]==2.40.0
     # via -r requirements-test.in
-redis==5.2.0
+redis==5.2.1
     # via tensorizer
 referencing==0.35.1
     # via
     #   jsonschema
     #   jsonschema-specifications
-regex==2024.9.11
+regex==2024.11.6
     # via
     #   nltk
     #   sacrebleu
@@ -438,13 +440,13 @@ requests==2.32.3
     #   transformers
 rouge-score==0.1.2
     # via lm-eval
-rpds-py==0.20.1
+rpds-py==0.22.3
     # via
     #   jsonschema
     #   referencing
 rsa==4.7.2
     # via awscli
-s3transfer==0.10.3
+s3transfer==0.10.4
     # via
     #   awscli
     #   boto3
@@ -456,28 +458,26 @@ safetensors==0.4.5
     #   peft
     #   timm
     #   transformers
-scikit-learn==1.5.2
+scikit-learn==1.6.0
     # via
     #   librosa
     #   lm-eval
     #   sentence-transformers
-scipy==1.13.1
+scipy==1.14.1
     # via
     #   librosa
     #   scikit-learn
     #   sentence-transformers
-sentence-transformers==3.2.1
+sentence-transformers==3.3.1
     # via -r requirements-test.in
 sentencepiece==0.2.0
     # via mistral-common
-six==1.16.0
+six==1.17.0
     # via
     #   python-dateutil
     #   rouge-score
 sniffio==1.3.1
-    # via
-    #   anyio
-    #   httpx
+    # via anyio
 soundfile==0.12.1
     # via
     #   -r requirements-test.in
@@ -496,7 +496,7 @@ tcolorpy==0.1.6
     # via pytablewriter
 tenacity==9.0.0
     # via lm-eval
-tensorizer==2.9.0
+tensorizer==2.9.1
     # via -r requirements-test.in
 threadpoolctl==3.5.0
     # via scikit-learn
@@ -504,7 +504,7 @@ tiktoken==0.7.0
     # via
     #   lm-eval
     #   mistral-common
-timm==1.0.11
+timm==1.0.12
     # via -r requirements-test.in
 tokenizers==0.21.0
     # via transformers
@@ -521,7 +521,7 @@ torch==2.5.1
     #   torchvision
 torchvision==0.20.1
     # via timm
-tqdm==4.66.6
+tqdm==4.67.1
     # via
     #   datasets
     #   evaluate
@@ -534,7 +534,7 @@ tqdm==4.66.6
     #   transformers
 tqdm-multiprocess==0.0.11
     # via lm-eval
-transformers==4.47.0
+transformers==4.47.1
     # via
     #   lm-eval
     #   peft
@@ -551,6 +551,8 @@ typepy[datetime]==1.3.2
     #   tabledata
 typing-extensions==4.12.2
     # via
+    #   anyio
+    #   bitsandbytes
     #   huggingface-hub
     #   librosa
     #   mistral-common
@@ -559,7 +561,7 @@ typing-extensions==4.12.2
     #   torch
 tzdata==2024.2
     # via pandas
-urllib3==1.26.20
+urllib3==2.3.0
     # via
     #   botocore
     #   requests
@@ -569,7 +571,7 @@ xxhash==3.5.0
     # via
     #   datasets
     #   evaluate
-yarl==1.17.1
+yarl==1.18.3
     # via aiohttp
 zstandard==0.23.0
     # via lm-eval

From b1eaf23826a49719e9f76166eab2fe0cbd8deb6f Mon Sep 17 00:00:00 2001
From: "rshaw@neuralmagic.com" <rshaw@neuralmagic.com>
Date: Thu, 9 Jan 2025 18:04:24 +0000
Subject: [PATCH 2/4] updated

---
 .buildkite/test-pipeline.yaml |   3 +
 requirements-test.txt         | 126 +++++++++++++++++-----------------
 2 files changed, 67 insertions(+), 62 deletions(-)

diff --git a/.buildkite/test-pipeline.yaml b/.buildkite/test-pipeline.yaml
index b563c96343f9..bba17e8b152f 100644
--- a/.buildkite/test-pipeline.yaml
+++ b/.buildkite/test-pipeline.yaml
@@ -180,6 +180,9 @@ steps:
     - tests/v1
   commands:
     - VLLM_USE_V1=1 pytest -v -s v1
+    # Test streaming is working properly. Requires special branch.
+    - pip install -U git+https://github.com/robertgshaw2-neuralmagic/lm-evaluation-harness.git@streaming-api
+    - pytest -v -s entrypoints/openai/test_accuracy.py::test_lm_eval_accuracy_v1_engine
 
 - label: Examples Test # 25min
   working_dir: "/vllm-workspace/examples"
diff --git a/requirements-test.txt b/requirements-test.txt
index 6d563efffcd1..f576e42afcbb 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -2,32 +2,32 @@
 # This file is autogenerated by pip-compile with Python 3.12
 # by the following command:
 #
-#    pip-compile --output-file=requirements-test.txt requirements-test.in
+#    python3.12 -m piptools compile requirements-test.in -o requirements-test.txt
 #
 absl-py==2.1.0
     # via rouge-score
-accelerate==1.2.1
+accelerate==1.0.1
     # via
     #   lm-eval
     #   peft
-aiohappyeyeballs==2.4.4
+aiohappyeyeballs==2.4.3
     # via aiohttp
-aiohttp==3.11.11
+aiohttp==3.10.10
     # via
     #   datasets
     #   fsspec
     #   lm-eval
-aiosignal==1.3.2
+aiosignal==1.3.1
     # via
     #   aiohttp
     #   ray
 annotated-types==0.7.0
     # via pydantic
-anyio==4.7.0
+anyio==4.6.2.post1
     # via httpx
-argcomplete==3.5.2
+argcomplete==3.5.1
     # via datamodel-code-generator
-attrs==24.3.0
+attrs==24.2.0
     # via
     #   aiohttp
     #   jsonlines
@@ -35,22 +35,24 @@ attrs==24.3.0
     #   referencing
 audioread==3.0.1
     # via librosa
-awscli==1.36.31
+awscli==1.35.23
     # via -r requirements-test.in
-bitsandbytes==0.45.0
+bitsandbytes>=0.45.0
     # via -r requirements-test.in
 black==24.10.0
     # via datamodel-code-generator
-boto3==1.35.90
+boto3==1.35.57
     # via tensorizer
-botocore==1.35.90
+botocore==1.35.57
     # via
     #   awscli
     #   boto3
     #   s3transfer
+bounded-pool-executor==0.0.3
+    # via pqdm
 buildkite-test-collector==0.1.9
     # via -r requirements-test.in
-certifi==2024.12.14
+certifi==2024.8.30
     # via
     #   httpcore
     #   httpx
@@ -59,9 +61,9 @@ cffi==1.17.1
     # via soundfile
 chardet==5.2.0
     # via mbstrdecoder
-charset-normalizer==3.4.1
+charset-normalizer==3.4.0
     # via requests
-click==8.1.8
+click==8.1.7
     # via
     #   black
     #   nltk
@@ -71,19 +73,19 @@ colorama==0.4.6
     #   awscli
     #   sacrebleu
     #   tqdm-multiprocess
-contourpy==1.3.1
+contourpy==1.3.0
     # via matplotlib
 cupy-cuda12x==13.3.0
     # via ray
 cycler==0.12.1
     # via matplotlib
-datamodel-code-generator==0.26.4
+datamodel-code-generator==0.26.3
     # via -r requirements-test.in
 dataproperty==1.0.1
     # via
     #   pytablewriter
     #   tabledata
-datasets==3.2.0
+datasets==3.0.2
     # via
     #   evaluate
     #   lm-eval
@@ -107,7 +109,7 @@ email-validator==2.2.0
     # via pydantic
 evaluate==0.4.3
     # via lm-eval
-fastrlock==0.8.3
+fastrlock==0.8.2
     # via cupy-cuda12x
 filelock==3.16.1
     # via
@@ -117,7 +119,7 @@ filelock==3.16.1
     #   torch
     #   transformers
     #   triton
-fonttools==4.55.3
+fonttools==4.54.1
     # via matplotlib
 frozenlist==1.5.0
     # via
@@ -134,13 +136,13 @@ genson==1.3.0
     # via datamodel-code-generator
 h11==0.14.0
     # via httpcore
-hiredis==3.1.0
+hiredis==3.0.0
     # via tensorizer
-httpcore==1.0.7
+httpcore==1.0.6
     # via httpx
-httpx==0.28.1
+httpx==0.27.2
     # via -r requirements-test.in
-huggingface-hub==0.27.0
+huggingface-hub==0.26.2
     # via
     #   accelerate
     #   datasets
@@ -163,7 +165,7 @@ iniconfig==2.0.0
     # via pytest
 isort==5.13.2
     # via datamodel-code-generator
-jinja2==3.1.5
+jinja2==3.1.4
     # via
     #   datamodel-code-generator
     #   torch
@@ -184,7 +186,7 @@ jsonschema==4.23.0
     #   ray
 jsonschema-specifications==2024.10.1
     # via jsonschema
-kiwisolver==1.4.8
+kiwisolver==1.4.7
     # via matplotlib
 lazy-loader==0.4
     # via librosa
@@ -194,13 +196,13 @@ librosa==0.10.2.post1
     # via -r requirements-test.in
 llvmlite==0.43.0
     # via numba
-lm-eval[api] @ git+https://github.com/robertgshaw2-neuralmagic/lm-evaluation-harness.git@streaming-api
+lm-eval[api]==0.4.4
     # via -r requirements-test.in
 lxml==5.3.0
     # via sacrebleu
 markupsafe==3.0.2
     # via jinja2
-matplotlib==3.10.0
+matplotlib==3.9.2
     # via -r requirements-test.in
 mbstrdecoder==1.1.3
     # via
@@ -229,13 +231,13 @@ multiprocess==0.70.16
     #   evaluate
 mypy-extensions==1.0.0
     # via black
-networkx==3.4.2
+networkx==3.2.1
     # via torch
 nltk==3.9.1
     # via rouge-score
 numba==0.60.0
     # via librosa
-numexpr==2.10.2
+numexpr==2.10.1
     # via lm-eval
 numpy==1.26.4
     # via
@@ -297,7 +299,7 @@ nvidia-nvtx-cu12==12.4.127
     # via torch
 opencv-python-headless==4.10.0.84
     # via mistral-common
-packaging==24.2
+packaging==24.1
     # via
     #   accelerate
     #   black
@@ -322,7 +324,7 @@ pathspec==0.12.1
     # via black
 pathvalidate==3.2.1
     # via pytablewriter
-peft==0.14.0
+peft==0.13.2
     # via
     #   -r requirements-test.in
     #   lm-eval
@@ -340,24 +342,24 @@ pluggy==1.5.0
     # via pytest
 pooch==1.8.2
     # via librosa
-portalocker==3.0.0
+portalocker==2.10.1
     # via sacrebleu
-propcache==0.2.1
-    # via
-    #   aiohttp
-    #   yarl
-protobuf==5.29.2
+pqdm==0.2.0
+    # via -r requirements-test.in
+propcache==0.2.0
+    # via yarl
+protobuf==5.28.3
     # via
     #   ray
     #   tensorizer
-psutil==6.1.1
+psutil==6.1.0
     # via
     #   accelerate
     #   peft
     #   tensorizer
 py==1.11.0
     # via pytest-forked
-pyarrow==18.1.0
+pyarrow==18.0.0
     # via datasets
 pyasn1==0.6.1
     # via rsa
@@ -365,17 +367,17 @@ pybind11==2.13.6
     # via lm-eval
 pycparser==2.22
     # via cffi
-pydantic[email]==2.10.4
+pydantic[email]==2.9.2
     # via
     #   datamodel-code-generator
     #   mistral-common
-pydantic-core==2.27.2
+pydantic-core==2.23.4
     # via pydantic
 pyparsing==3.2.0
     # via matplotlib
 pytablewriter==1.2.0
     # via lm-eval
-pytest==8.3.4
+pytest==8.3.3
     # via
     #   -r requirements-test.in
     #   buildkite-test-collector
@@ -383,11 +385,11 @@ pytest==8.3.4
     #   pytest-forked
     #   pytest-rerunfailures
     #   pytest-shard
-pytest-asyncio==0.25.0
+pytest-asyncio==0.24.0
     # via -r requirements-test.in
 pytest-forked==1.6.0
     # via -r requirements-test.in
-pytest-rerunfailures==15.0
+pytest-rerunfailures==14.0
     # via -r requirements-test.in
 pytest-shard==0.1.2
     # via -r requirements-test.in
@@ -414,13 +416,13 @@ pyyaml==6.0.2
     #   transformers
 ray[adag]==2.40.0
     # via -r requirements-test.in
-redis==5.2.1
+redis==5.2.0
     # via tensorizer
 referencing==0.35.1
     # via
     #   jsonschema
     #   jsonschema-specifications
-regex==2024.11.6
+regex==2024.9.11
     # via
     #   nltk
     #   sacrebleu
@@ -440,13 +442,13 @@ requests==2.32.3
     #   transformers
 rouge-score==0.1.2
     # via lm-eval
-rpds-py==0.22.3
+rpds-py==0.20.1
     # via
     #   jsonschema
     #   referencing
 rsa==4.7.2
     # via awscli
-s3transfer==0.10.4
+s3transfer==0.10.3
     # via
     #   awscli
     #   boto3
@@ -458,26 +460,28 @@ safetensors==0.4.5
     #   peft
     #   timm
     #   transformers
-scikit-learn==1.6.0
+scikit-learn==1.5.2
     # via
     #   librosa
     #   lm-eval
     #   sentence-transformers
-scipy==1.14.1
+scipy==1.13.1
     # via
     #   librosa
     #   scikit-learn
     #   sentence-transformers
-sentence-transformers==3.3.1
+sentence-transformers==3.2.1
     # via -r requirements-test.in
 sentencepiece==0.2.0
     # via mistral-common
-six==1.17.0
+six==1.16.0
     # via
     #   python-dateutil
     #   rouge-score
 sniffio==1.3.1
-    # via anyio
+    # via
+    #   anyio
+    #   httpx
 soundfile==0.12.1
     # via
     #   -r requirements-test.in
@@ -496,7 +500,7 @@ tcolorpy==0.1.6
     # via pytablewriter
 tenacity==9.0.0
     # via lm-eval
-tensorizer==2.9.1
+tensorizer==2.9.0
     # via -r requirements-test.in
 threadpoolctl==3.5.0
     # via scikit-learn
@@ -504,7 +508,7 @@ tiktoken==0.7.0
     # via
     #   lm-eval
     #   mistral-common
-timm==1.0.12
+timm==1.0.11
     # via -r requirements-test.in
 tokenizers==0.21.0
     # via transformers
@@ -521,7 +525,7 @@ torch==2.5.1
     #   torchvision
 torchvision==0.20.1
     # via timm
-tqdm==4.67.1
+tqdm==4.66.6
     # via
     #   datasets
     #   evaluate
@@ -534,7 +538,7 @@ tqdm==4.67.1
     #   transformers
 tqdm-multiprocess==0.0.11
     # via lm-eval
-transformers==4.47.1
+transformers==4.47.0
     # via
     #   lm-eval
     #   peft
@@ -551,8 +555,6 @@ typepy[datetime]==1.3.2
     #   tabledata
 typing-extensions==4.12.2
     # via
-    #   anyio
-    #   bitsandbytes
     #   huggingface-hub
     #   librosa
     #   mistral-common
@@ -561,7 +563,7 @@ typing-extensions==4.12.2
     #   torch
 tzdata==2024.2
     # via pandas
-urllib3==2.3.0
+urllib3==1.26.20
     # via
     #   botocore
     #   requests
@@ -571,7 +573,7 @@ xxhash==3.5.0
     # via
     #   datasets
     #   evaluate
-yarl==1.18.3
+yarl==1.17.1
     # via aiohttp
 zstandard==0.23.0
     # via lm-eval

From 237fcc3880c5c2bbc96bcb72858e02ca74437c6c Mon Sep 17 00:00:00 2001
From: "rshaw@neuralmagic.com" <rshaw@neuralmagic.com>
Date: Thu, 9 Jan 2025 18:04:36 +0000
Subject: [PATCH 3/4] revert

---
 requirements-test.in | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements-test.in b/requirements-test.in
index 91aa7f473da0..4b4dc376d1fa 100644
--- a/requirements-test.in
+++ b/requirements-test.in
@@ -13,6 +13,7 @@ einops # required for MPT, qwen-vl and Mamba
 httpx
 librosa # required for audio tests
 peft
+pqdm
 ray[adag]==2.40.0
 sentence-transformers # required for embedding tests
 soundfile # required for audio tests
@@ -22,7 +23,7 @@ transformers_stream_generator # required for qwen-vl test
 matplotlib # required for qwen-vl test
 mistral_common[opencv] >= 1.5.0 # required for pixtral test
 datamodel_code_generator # required for minicpm3 test
-lm-eval[api] @ git+https://github.com/robertgshaw2-neuralmagic/lm-evaluation-harness.git@streaming-api # required for model evaluation test with streaming
+lm-eval[api]==0.4.4 # required for model evaluation test
 
 # quantization
 bitsandbytes>=0.45.0

From 6b9fecec01f9c717e2b20603cee98d01971de806 Mon Sep 17 00:00:00 2001
From: "rshaw@neuralmagic.com" <rshaw@neuralmagic.com>
Date: Thu, 9 Jan 2025 18:10:59 +0000
Subject: [PATCH 4/4] revert

---
 requirements-test.in  | 1 -
 requirements-test.txt | 4 ----
 2 files changed, 5 deletions(-)

diff --git a/requirements-test.in b/requirements-test.in
index 4b4dc376d1fa..fb4179c3d842 100644
--- a/requirements-test.in
+++ b/requirements-test.in
@@ -13,7 +13,6 @@ einops # required for MPT, qwen-vl and Mamba
 httpx
 librosa # required for audio tests
 peft
-pqdm
 ray[adag]==2.40.0
 sentence-transformers # required for embedding tests
 soundfile # required for audio tests
diff --git a/requirements-test.txt b/requirements-test.txt
index f576e42afcbb..3771577fe8ed 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -48,8 +48,6 @@ botocore==1.35.57
     #   awscli
     #   boto3
     #   s3transfer
-bounded-pool-executor==0.0.3
-    # via pqdm
 buildkite-test-collector==0.1.9
     # via -r requirements-test.in
 certifi==2024.8.30
@@ -344,8 +342,6 @@ pooch==1.8.2
     # via librosa
 portalocker==2.10.1
     # via sacrebleu
-pqdm==0.2.0
-    # via -r requirements-test.in
 propcache==0.2.0
     # via yarl
 protobuf==5.28.3