Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -620,15 +620,15 @@ RUN set -eux; \
ARG BITSANDBYTES_VERSION_X86=0.46.1
ARG BITSANDBYTES_VERSION_ARM64=0.42.0
ARG TIMM_VERSION=">=1.0.17"
ARG RUNAI_MODEL_STREAMER_VERSION=">=0.15.3"
ARG RUNAI_MODEL_STREAMER_VERSION=">=0.15.7"
RUN --mount=type=cache,target=/root/.cache/uv \
if [ "$TARGETPLATFORM" = "linux/arm64" ]; then \
BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_ARM64}"; \
else \
BITSANDBYTES_VERSION="${BITSANDBYTES_VERSION_X86}"; \
fi; \
uv pip install --system accelerate hf_transfer modelscope \
"bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs]${RUNAI_MODEL_STREAMER_VERSION}"
"bitsandbytes>=${BITSANDBYTES_VERSION}" "timm${TIMM_VERSION}" "runai-model-streamer[s3,gcs,azure]${RUNAI_MODEL_STREAMER_VERSION}"

# ============================================================
# VLLM INSTALLATION (depends on build stage)
Expand Down
2 changes: 1 addition & 1 deletion docker/versions.json
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,7 @@
"default": ">=1.0.17"
},
"RUNAI_MODEL_STREAMER_VERSION": {
"default": ">=0.15.3"
"default": ">=0.15.7"
}
}
}
10 changes: 10 additions & 0 deletions docs/models/extensions/runai_model_streamer.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,16 @@ vllm serve gs://core-llm/Llama-3-8b \
--load-format runai_streamer
```

To run model from Azure Blob Storage run:

```bash
AZURE_STORAGE_ACCOUNT_NAME=<account> \
vllm serve az://<container>/<model-path> \
--load-format runai_streamer
```

Authentication uses `DefaultAzureCredential`, which supports `az login`, managed identity, environment variables (`AZURE_CLIENT_ID`, `AZURE_TENANT_ID`, `AZURE_CLIENT_SECRET`), and other methods.

To run model from a S3 compatible object store run:

```bash
Expand Down
2 changes: 1 addition & 1 deletion requirements/nightly_torch_test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ tritonclient>=2.51.0

numba == 0.61.2 # Required for N-gram speculative decoding
numpy
runai-model-streamer[s3,gcs]==0.15.3
runai-model-streamer[s3,gcs,azure]==0.15.7
fastsafetensors>=0.2.2
instanttensor>=0.1.5
pydantic>=2.12 # 2.11 leads to error on python 3.13
2 changes: 1 addition & 1 deletion requirements/rocm.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ tensorizer==2.10.1
packaging>=24.2
setuptools>=77.0.3,<80.0.0
setuptools-scm>=8
runai-model-streamer[s3,gcs]==0.15.3
runai-model-streamer[s3,gcs,azure]==0.15.7
conch-triton-kernels==1.2.1
timm>=1.0.17
# amd-quark: required for Quark quantization on ROCm
Expand Down
2 changes: 1 addition & 1 deletion requirements/test.in
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ grpcio-reflection==1.78.0
arctic-inference == 0.1.1 # Required for suffix decoding test
numba == 0.61.2 # Required for N-gram speculative decoding
numpy
runai-model-streamer[s3,gcs]==0.15.3
runai-model-streamer[s3,gcs,azure]==0.15.7
fastsafetensors>=0.2.2 # 0.2.2 contains important fixes for multi-GPU mem usage
instanttensor>=0.1.5
pydantic>=2.12 # 2.11 leads to error on python 3.13
Expand Down
43 changes: 38 additions & 5 deletions requirements/test.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,14 @@ audioread==3.0.1
# via librosa
av==16.1.0
# via -r requirements/test.in
azure-core==1.38.2
# via
# azure-identity
# azure-storage-blob
azure-identity==1.25.2
# via runai-model-streamer-azure
azure-storage-blob==12.28.0
# via runai-model-streamer-azure
backoff==2.2.1
# via
# -r requirements/test.in
Expand Down Expand Up @@ -103,8 +111,10 @@ certifi==2024.8.30
# rasterio
# requests
# sentry-sdk
cffi==1.17.1
# via soundfile
cffi==2.0.0
# via
# cryptography
# soundfile
chardet==5.2.0
# via mbstrdecoder
charset-normalizer==3.4.0
Expand Down Expand Up @@ -148,6 +158,12 @@ coverage==7.10.6
# via pytest-cov
cramjam==2.9.0
# via fastparquet
cryptography==46.0.5
# via
# azure-identity
# azure-storage-blob
# msal
# pyjwt
cuda-bindings==12.9.4
# via torch
cuda-pathfinder==1.3.3
Expand Down Expand Up @@ -379,6 +395,8 @@ iniconfig==2.0.0
# via pytest
instanttensor==0.1.5
# via -r requirements/test.in
isodate==0.7.2
# via azure-storage-blob
isoduration==20.11.0
# via jsonschema
isort==5.13.2
Expand Down Expand Up @@ -492,6 +510,12 @@ more-itertools==10.5.0
# via lm-eval
mpmath==1.3.0
# via sympy
msal==1.34.0
# via
# azure-identity
# msal-extensions
msal-extensions==1.3.1
# via azure-identity
msgpack==1.1.0
# via
# librosa
Expand Down Expand Up @@ -828,6 +852,8 @@ pydantic-extra-types==2.10.5
# via mistral-common
pygments==2.18.0
# via rich
pyjwt==2.11.0
# via msal
pyogrio==0.11.0
# via geopandas
pyparsing==3.2.0
Expand Down Expand Up @@ -945,6 +971,7 @@ regex==2024.9.11
# transformers
requests==2.32.3
# via
# azure-core
# buildkite-test-collector
# datasets
# diffusers
Expand All @@ -957,6 +984,7 @@ requests==2.32.3
# lightly
# lm-eval
# mistral-common
# msal
# mteb
# pooch
# ray
Expand Down Expand Up @@ -993,11 +1021,13 @@ rsa==4.9.1
# via google-auth
rtree==1.4.0
# via torchgeo
runai-model-streamer==0.15.3
runai-model-streamer==0.15.7
# via -r requirements/test.in
runai-model-streamer-gcs==0.15.3
runai-model-streamer-azure==0.15.7
# via runai-model-streamer
runai-model-streamer-gcs==0.15.7
# via runai-model-streamer
runai-model-streamer-s3==0.15.3
runai-model-streamer-s3==0.15.7
# via runai-model-streamer
s3transfer==0.10.3
# via boto3
Expand Down Expand Up @@ -1266,6 +1296,9 @@ typing-extensions==4.15.0
# aiosignal
# albumentations
# alembic
# azure-core
# azure-identity
# azure-storage-blob
# chz
# fastapi
# grpcio
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -970,7 +970,7 @@ def _read_requirements(filename: str) -> list[str]:
"tensorizer": ["tensorizer==2.10.1"],
"fastsafetensors": ["fastsafetensors >= 0.2.2"],
"instanttensor": ["instanttensor >= 0.1.5"],
"runai": ["runai-model-streamer[s3,gcs] >= 0.15.3"],
"runai": ["runai-model-streamer[s3,gcs,azure] >= 0.15.7"],
"audio": [
"librosa",
"scipy",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
def test_is_runai_obj_uri():
assert is_runai_obj_uri("gs://some-gcs-bucket/path")
assert is_runai_obj_uri("s3://some-s3-bucket/path")
assert is_runai_obj_uri("az://some-azure-container/path")
assert not is_runai_obj_uri("nfs://some-nfs-path")


Expand Down
9 changes: 9 additions & 0 deletions tests/transformers_utils/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
split_remote_gguf,
)
from vllm.transformers_utils.utils import (
is_azure,
is_cloud_storage,
is_gcs,
is_s3,
Expand All @@ -31,9 +32,17 @@ def test_is_s3():
assert not is_s3("nfs://nfs-fqdn.local")


def test_is_azure():
assert is_azure("az://model-container/path")
assert not is_azure("s3://model-path/path-to-model")
assert not is_azure("/unix/local/path")
assert not is_azure("nfs://nfs-fqdn.local")


def test_is_cloud_storage():
assert is_cloud_storage("gs://model-path")
assert is_cloud_storage("s3://model-path/path-to-model")
assert is_cloud_storage("az://model-container/path")
assert not is_cloud_storage("/unix/local/path")
assert not is_cloud_storage("nfs://nfs-fqdn.local")

Expand Down
5 changes: 3 additions & 2 deletions vllm/config/vllm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1574,8 +1574,9 @@ def try_verify_and_update_config(self):
"runai_streamer_sharded",
):
raise ValueError(
f"To load a model from S3, 'load_format' "
f"must be 'runai_streamer' or 'runai_streamer_sharded', "
f"To load a model from object storage (S3/GCS/Azure), "
f"'load_format' must be 'runai_streamer' or "
f"'runai_streamer_sharded', "
f"but got '{self.load_config.load_format}'. "
f"Model: {self.model_config.model}"
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
class RunaiModelStreamerLoader(BaseModelLoader):
"""
Model loader that can load safetensors
files from local FS or S3 bucket.
files from local FS, S3, GCS, or Azure Blob Storage.
"""

def __init__(self, load_config: LoadConfig):
Expand Down
2 changes: 1 addition & 1 deletion vllm/transformers_utils/runai_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@

logger = init_logger(__name__)

SUPPORTED_SCHEMES = ["s3://", "gs://"]
SUPPORTED_SCHEMES = ["s3://", "gs://", "az://"]

try:
from runai_model_streamer import list_safetensors as runai_list_safetensors
Expand Down
6 changes: 5 additions & 1 deletion vllm/transformers_utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,12 @@ def is_gcs(model_or_path: str) -> bool:
return model_or_path.lower().startswith("gs://")


def is_azure(model_or_path: str) -> bool:
return model_or_path.lower().startswith("az://")


def is_cloud_storage(model_or_path: str) -> bool:
return is_s3(model_or_path) or is_gcs(model_or_path)
return is_s3(model_or_path) or is_gcs(model_or_path) or is_azure(model_or_path)


def without_trust_remote_code(kwargs: dict[str, Any]) -> dict[str, Any]:
Expand Down
Loading