diff --git a/vllm/compilation/cuda_graph.py b/vllm/compilation/cuda_graph.py index 78841866f752..00bf4bbc71f1 100644 --- a/vllm/compilation/cuda_graph.py +++ b/vllm/compilation/cuda_graph.py @@ -189,6 +189,7 @@ def __init__( self.first_run_finished = False self.is_debugging_mode = envs.VLLM_LOGGING_LEVEL == "DEBUG" + self._runnable_str = str(runnable) if self.is_debugging_mode else None # assert runtime_mode is not NONE(no cudagraph), otherwise, we don't # need to initialize a CUDAGraphWrapper. @@ -211,10 +212,12 @@ def __getattr__(self, key: str) -> Any: # allow accessing the attributes of the runnable. if hasattr(self.runnable, key): return getattr(self.runnable, key) - raise AttributeError( - f"Attribute {key} not exists in the runnable of " - f"cudagraph wrapper: {self.runnable}" - ) + if self.is_debugging_mode: + raise AttributeError( + f"Attribute {key} not exists in the runnable of " + f"cudagraph wrapper: {self._runnable_str}" + ) + raise AttributeError def unwrap(self) -> Callable[..., Any]: # in case we need to access the original runnable. diff --git a/vllm/v1/worker/gpu_ubatch_wrapper.py b/vllm/v1/worker/gpu_ubatch_wrapper.py index 64856052fcfd..323b96347e00 100644 --- a/vllm/v1/worker/gpu_ubatch_wrapper.py +++ b/vllm/v1/worker/gpu_ubatch_wrapper.py @@ -119,6 +119,8 @@ def __init__( self.sm_control = self._create_sm_control_context(vllm_config) self.device = device + self.is_debugging_mode = envs.VLLM_LOGGING_LEVEL == "DEBUG" + self._runnable_str = str(runnable) if self.is_debugging_mode else None @property def graph_pool(self): @@ -170,10 +172,12 @@ def __getattr__(self, key: str): # allow accessing the attributes of the runnable. if hasattr(self.runnable, key): return getattr(self.runnable, key) - raise AttributeError( - f"Attribute {key} not exists in the runnable of " - f"cudagraph wrapper: {self.runnable}" - ) + if self.is_debugging_mode: + raise AttributeError( + f"Attribute {key} not exists in the runnable of " + f"cudagraph wrapper: {self._runnable_str}" + ) + raise AttributeError def unwrap(self) -> Callable: # in case we need to access the original runnable.