vllm-project · simon-mo · Oct 2, 2025 · Oct 1, 2025 · Oct 1, 2025 · Oct 2, 2025
diff --git a/docs/features/multimodal_inputs.md b/docs/features/multimodal_inputs.md
@@ -8,6 +8,9 @@ This page teaches you how to pass multi-modal inputs to [multi-modal models][sup
 
 !!! tip
     When serving multi-modal models, consider setting `--allowed-media-domains` to restrict domain that vLLM can access to prevent it from accessing arbitrary endpoints that can potentially be vulnerable to Server-Side Request Forgery (SSRF) attacks. You can provide a list of domains for this arg. For example: `--allowed-media-domains upload.wikimedia.org github.com www.bogotobogo.com`
+
+    Also, consider setting `VLLM_MEDIA_URL_ALLOW_REDIRECTS=0` to prevent HTTP redirects from being followed to bypass domain restrictions.
+
     This restriction is especially important if you run vLLM in a containerized environment where the vLLM pods may have unrestricted access to internal networks.
 
 ## Offline Inference

@@ -66,6 +66,9 @@ Restrict domains that vLLM can access for media URLs by setting
 `--allowed-media-domains` to prevent Server-Side Request Forgery (SSRF) attacks.
 (e.g. `--allowed-media-domains upload.wikimedia.org github.com www.bogotobogo.com`)
 
+Also, consider setting `VLLM_MEDIA_URL_ALLOW_REDIRECTS=0` to prevent HTTP
+redirects from being followed to bypass domain restrictions.
+
 ## Security and Firewalls: Protecting Exposed vLLM Systems
 
 While vLLM is designed to allow unsafe network services to be isolated to

diff --git a/vllm/connections.py b/vllm/connections.py
@@ -54,6 +54,7 @@ def get_response(
         stream: bool = False,
         timeout: Optional[float] = None,
         extra_headers: Optional[Mapping[str, str]] = None,
+        allow_redirects: bool = True,
     ):
         self._validate_http_url(url)
 
@@ -63,14 +64,16 @@ def get_response(
         return client.get(url,
                           headers=self._headers(**extra_headers),
                           stream=stream,
-                          timeout=timeout)
+                          timeout=timeout,
+                          allow_redirects=allow_redirects)
 
     async def get_async_response(
         self,
         url: str,
         *,
         timeout: Optional[float] = None,
         extra_headers: Optional[Mapping[str, str]] = None,
+        allow_redirects: bool = True,
     ):
         self._validate_http_url(url)
 
@@ -79,10 +82,17 @@ async def get_async_response(
 
         return client.get(url,
                           headers=self._headers(**extra_headers),
-                          timeout=timeout)
-
-    def get_bytes(self, url: str, *, timeout: Optional[float] = None) -> bytes:
-        with self.get_response(url, timeout=timeout) as r:
+                          timeout=timeout,
+                          allow_redirects=allow_redirects)
+
+    def get_bytes(self,
+                  url: str,
+                  *,
+                  timeout: Optional[float] = None,
+                  allow_redirects: bool = True) -> bytes:
+        with self.get_response(url,
+                               timeout=timeout,
+                               allow_redirects=allow_redirects) as r:
             r.raise_for_status()
 
             return r.content
@@ -92,8 +102,10 @@ async def async_get_bytes(
         url: str,
         *,
         timeout: Optional[float] = None,
+        allow_redirects: bool = True,
     ) -> bytes:
-        async with await self.get_async_response(url, timeout=timeout) as r:
+        async with await self.get_async_response(
+                url, timeout=timeout, allow_redirects=allow_redirects) as r:
             r.raise_for_status()
 
             return await r.read()

diff --git a/vllm/envs.py b/vllm/envs.py
@@ -68,6 +68,7 @@
     VLLM_IMAGE_FETCH_TIMEOUT: int = 5
     VLLM_VIDEO_FETCH_TIMEOUT: int = 30
     VLLM_AUDIO_FETCH_TIMEOUT: int = 10
+    VLLM_MEDIA_URL_ALLOW_REDIRECTS: bool = True
     VLLM_MEDIA_LOADING_THREAD_COUNT: int = 8
     VLLM_MAX_AUDIO_CLIP_FILESIZE_MB: int = 25
     VLLM_VIDEO_LOADER_BACKEND: str = "opencv"
@@ -733,6 +734,11 @@ def get_vllm_port() -> Optional[int]:
     "VLLM_AUDIO_FETCH_TIMEOUT":
     lambda: int(os.getenv("VLLM_AUDIO_FETCH_TIMEOUT", "10")),
 
+    # Whether to allow HTTP redirects when fetching from media URLs.
+    # Default to True
+    "VLLM_MEDIA_URL_ALLOW_REDIRECTS":
+    lambda: bool(int(os.getenv("VLLM_MEDIA_URL_ALLOW_REDIRECTS", "1"))),
+
     # Max number of workers for the thread pool handling
     # media bytes loading. Set to 1 to disable parallel processing.
     # Default is 8

@@ -140,7 +140,11 @@ def load_from_url(
             self._assert_url_in_allowed_media_domains(url_spec)
 
             connection = self.connection
-            data = connection.get_bytes(url, timeout=fetch_timeout)
+            data = connection.get_bytes(
+                url,
+                timeout=fetch_timeout,
+                allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS,
+            )
 
             return media_io.load_bytes(data)
 
@@ -167,7 +171,11 @@ async def load_from_url_async(
             self._assert_url_in_allowed_media_domains(url_spec)
 
             connection = self.connection
-            data = await connection.async_get_bytes(url, timeout=fetch_timeout)
+            data = await connection.async_get_bytes(
+                url,
+                timeout=fetch_timeout,
+                allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS,
+            )
             future = loop.run_in_executor(global_thread_pool,
                                           media_io.load_bytes, data)
             return await future