enable vLLM upload with CUDA 12.8 build (#465)

ebsmothers · web-flow · commit d464193e1f61 · 2025-10-20T09:23:26.000-07:00
diff --git a/.github/packaging/vllm_reqs.txt b/.github/packaging/vllm_reqs.txt
@@ -7,20 +7,20 @@
 # See the file .github/workflows/gpu_test.yaml for an E2E forge installation using this approach.
 # TODO: this should be done way less hackily
 aiohappyeyeballs==2.6.1
-aiohttp==3.13.0
+aiohttp==3.13.1
 aiosignal==1.4.0
 annotated-types==0.7.0
 anyio==4.11.0
 astor==0.8.1
 async-timeout==5.0.1
 attrs==25.4.0
-blake3==1.0.7
-cachetools==6.2.0
+blake3==1.0.8
+cachetools==6.2.1
 cbor2==5.7.0
 certifi==2025.10.5
 cffi==2.0.0
-charset-normalizer==3.4.3
-click==8.3.0
+charset-normalizer==3.4.4
+click==8.2.1
 cloudpickle==3.1.1
 cmake==4.1.0
 compressed-tensors==0.10.2
@@ -33,7 +33,7 @@ dnspython==2.8.0
 einops==0.8.1
 email-validator==2.3.0
 exceptiongroup==1.3.0
-fastapi==0.118.3
+fastapi==0.119.0
 fastapi-cli==0.0.13
 fastapi-cloud-cli==0.3.1
 fastrlock==0.8.3
@@ -47,81 +47,80 @@ httpcore==1.0.9
 httptools==0.7.1
 httpx==0.28.1
 huggingface-hub==0.35.3
-idna==3.10
+idna==3.11
 interegular==0.3.3
 Jinja2==3.1.6
-jiter==0.11.0
+jiter==0.11.1
 jsonschema==4.25.1
 jsonschema-specifications==2025.9.1
 lark==1.2.2
 llguidance==0.7.30
 llvmlite==0.44.0
 lm-format-enforcer==0.10.12
 markdown-it-py==4.0.0
-MarkupSafe==3.0.2
+MarkupSafe==2.1.5
 mdurl==0.1.2
 mistral_common==1.8.5
 mpmath==1.3.0
 msgpack==1.1.2
 msgspec==0.19.0
 multidict==6.7.0
-networkx==3.4.2
+networkx==3.3
 ninja==1.13.0
 numba==0.61.2
 numpy==2.2.6
-nvidia-cublas-cu12==12.9.1.4
-nvidia-cuda-cupti-cu12==12.9.79
-nvidia-cuda-nvrtc-cu12==12.9.86
-nvidia-cuda-runtime-cu12==12.9.79
+nvidia-cublas-cu12==12.8.4.1
+nvidia-cuda-cupti-cu12==12.8.90
+nvidia-cuda-nvrtc-cu12==12.8.93
+nvidia-cuda-runtime-cu12==12.8.90
 nvidia-cudnn-cu12==9.10.2.21
-nvidia-cufft-cu12==11.4.1.4
-nvidia-cufile-cu12==1.14.1.1
-nvidia-curand-cu12==10.3.10.19
-nvidia-cusolver-cu12==11.7.5.82
-nvidia-cusparse-cu12==12.5.10.65
+nvidia-cufft-cu12==11.3.3.83
+nvidia-cufile-cu12==1.13.1.3
+nvidia-curand-cu12==10.3.9.90
+nvidia-cusolver-cu12==11.7.3.90
+nvidia-cusparse-cu12==12.5.8.93
 nvidia-cusparselt-cu12==0.7.1
 nvidia-nccl-cu12==2.27.5
-nvidia-nvjitlink-cu12==12.9.86
+nvidia-nvjitlink-cu12==12.8.93
 nvidia-nvshmem-cu12==3.3.20
-nvidia-nvtx-cu12==12.9.79
+nvidia-nvtx-cu12==12.8.90
 openai==1.90.0
 opencv-python-headless==4.12.0.88
 outlines_core==0.2.10
 packaging==25.0
 partial-json-parser==0.2.1.1.post6
-pillow==11.3.0
+pillow==12.0.0
 prometheus-fastapi-instrumentator==7.1.0
 prometheus_client==0.23.1
 propcache==0.4.1
-protobuf==6.32.1
+protobuf==6.33.0
 psutil==7.1.0
 py-cpuinfo==9.0.0
 pybase64==1.4.2
 pycountry==24.6.1
 pycparser==2.23
-pydantic==2.12.0
+pydantic==2.12.3
 pydantic-extra-types==2.10.6
-pydantic_core==2.41.1
+pydantic_core==2.41.4
 Pygments==2.19.2
 python-dotenv==1.1.1
 python-json-logger==4.0.0
 python-multipart==0.0.20
-pytorch-triton==3.4.0+gitf7888497
 PyYAML==6.0.3
 pyzmq==27.1.0
-ray==2.49.2
-referencing==0.36.2
+ray==2.50.0
+referencing==0.37.0
 regex==2025.9.18
 requests==2.32.5
 rich==14.2.0
 rich-toolkit==0.15.1
-rignore==0.7.0
+rignore==0.7.1
 rpds-py==0.27.1
 safetensors==0.6.2
 scipy==1.15.3
 sentencepiece==0.2.1
-sentry-sdk==2.41.0
-setuptools-scm==9.2.0
+sentry-sdk==2.42.0
+setuptools-scm==9.2.1
 shellingham==1.5.4
 sniffio==1.3.1
 soundfile==0.13.1
@@ -131,17 +130,17 @@ sympy==1.14.0
 tiktoken==0.12.0
 tokenizers==0.22.1
 tomli==2.3.0
-torch==2.9.0.dev20250905+cu129
+torch==2.9.0+cu128
 tqdm==4.67.1
-transformers==4.57.0
-triton==3.4.0
+transformers==4.57.1
+triton==3.5.0
 typer==0.19.2
 typing-inspection==0.4.2
 typing_extensions==4.15.0
 urllib3==2.5.0
 uvicorn==0.37.0
-uvloop==0.21.0
-watchfiles==1.1.0
+uvloop==0.22.1
+watchfiles==1.1.1
 websockets==15.0.1
 xgrammar==0.1.21
 yarl==1.22.0
diff --git a/.github/workflows/build_vllm.yaml b/.github/workflows/build_vllm.yaml
@@ -12,15 +12,15 @@ permissions:
 
 jobs:
   build:
-    name: forge-cu129-nightly
-    uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@main
+    name: forge-cu128-nightly
+    uses: pytorch/test-infra/.github/workflows/build_wheels_linux.yml@vllm-push
     strategy:
       fail-fast: false
     with:
       repository: meta-pytorch/forge
       ref: ""
       test-infra-repository: pytorch/test-infra
-      test-infra-ref: main
+      test-infra-ref: vllm-push
       run-smoke-test: false
       wheel-nightly-policy: gha_workflow_preview_build_wheels
       wheel-upload-path: whl/preview/forge/
@@ -31,13 +31,13 @@ jobs:
             {
               "python_version": "3.10",
               "gpu_arch_type": "cpu",
-              "gpu_arch_version": "12.9",
-              "desired_cuda": "cu129",
-              "container_image": "pytorch/manylinux2_28-builder:cuda12.9",
+              "gpu_arch_version": "12.8",
+              "desired_cuda": "cu128",
+              "container_image": "pytorch/manylinux2_28-builder:cuda12.8",
               "package_type": "manywheel",
-              "build_name": "manywheel-py3_10-cuda12_9",
+              "build_name": "manywheel-py3_10-cuda12_8",
               "validation_runner": "linux.12xlarge.memory",
-              "installation": "pip3 install --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu129",
+              "installation": "pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu128",
               "channel": "nightly",
               "upload_to_base_bucket": "no",
               "stable_version": "2.8.0",