From d50b2eea756f18c6e64b769c63cfd0a4e73af689 Mon Sep 17 00:00:00 2001
From: shin-bot-litellm <shin-bot-litellm@berri.ai>
Date: Sat, 31 Jan 2026 17:59:14 +0000
Subject: [PATCH] litellm_fix(hosted_vllm): use custom client when provided

When a custom OpenAI client is passed to hosted_vllm completion, use
openai_chat_completions.completion() to ensure the client is properly
used. This fixes test failures in test_openai_compatible_custom_api_base
and test_openai_compatible_custom_api_video for the hosted_vllm provider.

The base_llm_http_handler is still used when no custom client is passed,
preserving the ssl_verify functionality added in #19893.

Fixes: test_openai_compatible_custom_api_video[hosted_vllm]
Fixes: test_openai_compatible_custom_api_base[hosted_vllm]
---
 litellm/main.py | 63 ++++++++++++++++++++++++++++++++++---------------
 1 file changed, 44 insertions(+), 19 deletions(-)

diff --git a/litellm/main.py b/litellm/main.py
index 13361c644c..9bd4b42b80 100644
--- a/litellm/main.py
+++ b/litellm/main.py
@@ -2385,25 +2385,50 @@ def completion(  # type: ignore # noqa: PLR0915
                 api_base or litellm.api_base or get_secret_str("HOSTED_VLLM_API_BASE")
             )
 
-            response = base_llm_http_handler.completion(
-                model=model,
-                messages=messages,
-                api_base=api_base,
-                custom_llm_provider=custom_llm_provider,
-                model_response=model_response,
-                encoding=_get_encoding(),
-                logging_obj=logging,
-                optional_params=optional_params,
-                timeout=timeout,
-                litellm_params=litellm_params,
-                shared_session=shared_session,
-                acompletion=acompletion,
-                stream=stream,
-                api_key=api_key,
-                headers=headers,
-                client=client,
-                provider_config=provider_config,
-            )
+            # If a custom OpenAI client is passed, use it via openai_chat_completions
+            # This ensures the client is properly used (e.g., for mocking in tests)
+            # Otherwise, use base_llm_http_handler for ssl_verify support
+            if client is not None:
+                response = openai_chat_completions.completion(
+                    model=model,
+                    messages=messages,
+                    headers=headers,
+                    model_response=model_response,
+                    print_verbose=print_verbose,
+                    api_key=api_key,
+                    api_base=api_base,
+                    acompletion=acompletion,
+                    logging_obj=logging,
+                    optional_params=optional_params,
+                    litellm_params=litellm_params,
+                    logger_fn=logger_fn,
+                    timeout=timeout,  # type: ignore
+                    custom_prompt_dict=custom_prompt_dict,
+                    client=client,  # pass AsyncOpenAI, OpenAI client
+                    organization=organization,
+                    custom_llm_provider=custom_llm_provider,
+                    shared_session=shared_session,
+                )
+            else:
+                response = base_llm_http_handler.completion(
+                    model=model,
+                    messages=messages,
+                    api_base=api_base,
+                    custom_llm_provider=custom_llm_provider,
+                    model_response=model_response,
+                    encoding=_get_encoding(),
+                    logging_obj=logging,
+                    optional_params=optional_params,
+                    timeout=timeout,
+                    litellm_params=litellm_params,
+                    shared_session=shared_session,
+                    acompletion=acompletion,
+                    stream=stream,
+                    api_key=api_key,
+                    headers=headers,
+                    client=client,
+                    provider_config=provider_config,
+                )
             logging.post_call(
                 input=messages, api_key=api_key, original_response=response
             )