Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions docs/features/multimodal_inputs.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ This page teaches you how to pass multi-modal inputs to [multi-modal models][sup

!!! tip
When serving multi-modal models, consider setting `--allowed-media-domains` to restrict domain that vLLM can access to prevent it from accessing arbitrary endpoints that can potentially be vulnerable to Server-Side Request Forgery (SSRF) attacks. You can provide a list of domains for this arg. For example: `--allowed-media-domains upload.wikimedia.org github.com www.bogotobogo.com`

Also, consider setting `VLLM_MEDIA_URL_ALLOW_REDIRECTS=0` to prevent HTTP redirects from being followed to bypass domain restrictions.

This restriction is especially important if you run vLLM in a containerized environment where the vLLM pods may have unrestricted access to internal networks.

## Offline Inference
Expand Down
3 changes: 3 additions & 0 deletions docs/usage/security.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,9 @@ Restrict domains that vLLM can access for media URLs by setting
`--allowed-media-domains` to prevent Server-Side Request Forgery (SSRF) attacks.
(e.g. `--allowed-media-domains upload.wikimedia.org github.com www.bogotobogo.com`)

Also, consider setting `VLLM_MEDIA_URL_ALLOW_REDIRECTS=0` to prevent HTTP
redirects from being followed to bypass domain restrictions.

## Security and Firewalls: Protecting Exposed vLLM Systems

While vLLM is designed to allow unsafe network services to be isolated to
Expand Down
24 changes: 18 additions & 6 deletions vllm/connections.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def get_response(
stream: bool = False,
timeout: Optional[float] = None,
extra_headers: Optional[Mapping[str, str]] = None,
allow_redirects: bool = True,
):
self._validate_http_url(url)

Expand All @@ -63,14 +64,16 @@ def get_response(
return client.get(url,
headers=self._headers(**extra_headers),
stream=stream,
timeout=timeout)
timeout=timeout,
allow_redirects=allow_redirects)

async def get_async_response(
self,
url: str,
*,
timeout: Optional[float] = None,
extra_headers: Optional[Mapping[str, str]] = None,
allow_redirects: bool = True,
):
self._validate_http_url(url)

Expand All @@ -79,10 +82,17 @@ async def get_async_response(

return client.get(url,
headers=self._headers(**extra_headers),
timeout=timeout)

def get_bytes(self, url: str, *, timeout: Optional[float] = None) -> bytes:
with self.get_response(url, timeout=timeout) as r:
timeout=timeout,
allow_redirects=allow_redirects)

def get_bytes(self,
url: str,
*,
timeout: Optional[float] = None,
allow_redirects: bool = True) -> bytes:
with self.get_response(url,
timeout=timeout,
allow_redirects=allow_redirects) as r:
r.raise_for_status()

return r.content
Expand All @@ -92,8 +102,10 @@ async def async_get_bytes(
url: str,
*,
timeout: Optional[float] = None,
allow_redirects: bool = True,
) -> bytes:
async with await self.get_async_response(url, timeout=timeout) as r:
async with await self.get_async_response(
url, timeout=timeout, allow_redirects=allow_redirects) as r:
r.raise_for_status()

return await r.read()
Expand Down
6 changes: 6 additions & 0 deletions vllm/envs.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@
VLLM_IMAGE_FETCH_TIMEOUT: int = 5
VLLM_VIDEO_FETCH_TIMEOUT: int = 30
VLLM_AUDIO_FETCH_TIMEOUT: int = 10
VLLM_MEDIA_URL_ALLOW_REDIRECTS: bool = True
VLLM_MEDIA_LOADING_THREAD_COUNT: int = 8
VLLM_MAX_AUDIO_CLIP_FILESIZE_MB: int = 25
VLLM_VIDEO_LOADER_BACKEND: str = "opencv"
Expand Down Expand Up @@ -733,6 +734,11 @@ def get_vllm_port() -> Optional[int]:
"VLLM_AUDIO_FETCH_TIMEOUT":
lambda: int(os.getenv("VLLM_AUDIO_FETCH_TIMEOUT", "10")),

# Whether to allow HTTP redirects when fetching from media URLs.
# Default to True
"VLLM_MEDIA_URL_ALLOW_REDIRECTS":
lambda: bool(int(os.getenv("VLLM_MEDIA_URL_ALLOW_REDIRECTS", "1"))),

# Max number of workers for the thread pool handling
# media bytes loading. Set to 1 to disable parallel processing.
# Default is 8
Expand Down
12 changes: 10 additions & 2 deletions vllm/multimodal/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,11 @@ def load_from_url(
self._assert_url_in_allowed_media_domains(url_spec)

connection = self.connection
data = connection.get_bytes(url, timeout=fetch_timeout)
data = connection.get_bytes(
url,
timeout=fetch_timeout,
allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS,
)

return media_io.load_bytes(data)

Expand All @@ -167,7 +171,11 @@ async def load_from_url_async(
self._assert_url_in_allowed_media_domains(url_spec)

connection = self.connection
data = await connection.async_get_bytes(url, timeout=fetch_timeout)
data = await connection.async_get_bytes(
url,
timeout=fetch_timeout,
allow_redirects=envs.VLLM_MEDIA_URL_ALLOW_REDIRECTS,
)
future = loop.run_in_executor(global_thread_pool,
media_io.load_bytes, data)
return await future
Expand Down