From 7a0f75f6909ca6bcf827248a23b15a93975a0d79 Mon Sep 17 00:00:00 2001 From: Datta Nimmaturi Date: Tue, 12 Aug 2025 11:49:04 +0000 Subject: [PATCH 1/5] filter vLLM standby logs --- unsloth/models/_utils.py | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index 4426a28266..d8fe52613f 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -152,6 +152,29 @@ def __init__(self, text): self.text = text def filter(self, x): return not (self.text in x.getMessage()) pass +if os.environ.get('UNSLOTH_ENABLE_LOGGING', '0') != '1': + print(f'Patching vLLM worker, executor and prefix logs') + from vllm.worker.worker import logger as vllm_worker_logger + vllm_worker_logger.addFilter(HideLoggingMessage("Sleep mode freed")) + del vllm_worker_logger + + from vllm.v1.worker.gpu_worker import logger as vllm_gpu_worker_logger + vllm_gpu_worker_logger.addFilter(HideLoggingMessage("Sleep mode freed")) + del vllm_gpu_worker_logger + + from vllm.executor.executor_base import logger as vllm_executor_logger + vllm_executor_logger.addFilter(HideLoggingMessage("to fall asleep")) + vllm_executor_logger.addFilter(HideLoggingMessage("to wake up")) + del vllm_executor_logger + + from vllm.core.block.prefix_caching_block import logger as vllm_prefix_caching_logger + vllm_prefix_caching_logger.addFilter(HideLoggingMessage("reset prefix cache")) + del vllm_prefix_caching_logger + + from vllm.v1.core.block_pool import logger as vllm_block_pool_logger + vllm_block_pool_logger.addFilter(HideLoggingMessage("reset prefix cache")) + del vllm_block_pool_logger + # The speedups for torchdynamo mostly come with GPU Ampere or higher and which is not detected here. from transformers.training_args import logger as transformers_training_args_logger transformers_training_args_logger.addFilter(HideLoggingMessage("The speedups")) From 7c96971dae26c6ef92daac3ed23b09c30b9ff487 Mon Sep 17 00:00:00 2001 From: Datta Nimmaturi Date: Tue, 12 Aug 2025 13:19:25 +0000 Subject: [PATCH 2/5] safeguard standby logger patch --- unsloth/models/_utils.py | 45 +++++++++++++++++++++------------------- 1 file changed, 24 insertions(+), 21 deletions(-) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index d8fe52613f..cc01dad3f2 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -153,27 +153,30 @@ def filter(self, x): return not (self.text in x.getMessage()) pass if os.environ.get('UNSLOTH_ENABLE_LOGGING', '0') != '1': - print(f'Patching vLLM worker, executor and prefix logs') - from vllm.worker.worker import logger as vllm_worker_logger - vllm_worker_logger.addFilter(HideLoggingMessage("Sleep mode freed")) - del vllm_worker_logger - - from vllm.v1.worker.gpu_worker import logger as vllm_gpu_worker_logger - vllm_gpu_worker_logger.addFilter(HideLoggingMessage("Sleep mode freed")) - del vllm_gpu_worker_logger - - from vllm.executor.executor_base import logger as vllm_executor_logger - vllm_executor_logger.addFilter(HideLoggingMessage("to fall asleep")) - vllm_executor_logger.addFilter(HideLoggingMessage("to wake up")) - del vllm_executor_logger - - from vllm.core.block.prefix_caching_block import logger as vllm_prefix_caching_logger - vllm_prefix_caching_logger.addFilter(HideLoggingMessage("reset prefix cache")) - del vllm_prefix_caching_logger - - from vllm.v1.core.block_pool import logger as vllm_block_pool_logger - vllm_block_pool_logger.addFilter(HideLoggingMessage("reset prefix cache")) - del vllm_block_pool_logger + try: + print(f'Patching vLLM worker, executor and prefix logs') + from vllm.worker.worker import logger as vllm_worker_logger + vllm_worker_logger.addFilter(HideLoggingMessage("Sleep mode freed")) + del vllm_worker_logger + + from vllm.v1.worker.gpu_worker import logger as vllm_gpu_worker_logger + vllm_gpu_worker_logger.addFilter(HideLoggingMessage("Sleep mode freed")) + del vllm_gpu_worker_logger + + from vllm.executor.executor_base import logger as vllm_executor_logger + vllm_executor_logger.addFilter(HideLoggingMessage("to fall asleep")) + vllm_executor_logger.addFilter(HideLoggingMessage("to wake up")) + del vllm_executor_logger + + from vllm.core.block.prefix_caching_block import logger as vllm_prefix_caching_logger + vllm_prefix_caching_logger.addFilter(HideLoggingMessage("reset prefix cache")) + del vllm_prefix_caching_logger + + from vllm.v1.core.block_pool import logger as vllm_block_pool_logger + vllm_block_pool_logger.addFilter(HideLoggingMessage("reset prefix cache")) + del vllm_block_pool_logger + except Exception as e: + print(f'Unsloth: Failed to patch vLLM standby logger: {e}') # The speedups for torchdynamo mostly come with GPU Ampere or higher and which is not detected here. from transformers.training_args import logger as transformers_training_args_logger From 27e9aecfa91cfe682a7b071f813dec34bfd2be90 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Tue, 12 Aug 2025 19:41:55 -0700 Subject: [PATCH 3/5] Update unsloth/models/_utils.py --- unsloth/models/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index cc01dad3f2..a7336e66a0 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -154,8 +154,8 @@ def filter(self, x): return not (self.text in x.getMessage()) if os.environ.get('UNSLOTH_ENABLE_LOGGING', '0') != '1': try: - print(f'Patching vLLM worker, executor and prefix logs') from vllm.worker.worker import logger as vllm_worker_logger + print(f'Patching vLLM worker, executor and prefix logs') vllm_worker_logger.addFilter(HideLoggingMessage("Sleep mode freed")) del vllm_worker_logger From 9c247cd8323cd1b1022efa80ddb0418e8dfa2e50 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Tue, 12 Aug 2025 19:42:00 -0700 Subject: [PATCH 4/5] Update unsloth/models/_utils.py --- unsloth/models/_utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index a7336e66a0..56860d4276 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -176,7 +176,7 @@ def filter(self, x): return not (self.text in x.getMessage()) vllm_block_pool_logger.addFilter(HideLoggingMessage("reset prefix cache")) del vllm_block_pool_logger except Exception as e: - print(f'Unsloth: Failed to patch vLLM standby logger: {e}') + pass # The speedups for torchdynamo mostly come with GPU Ampere or higher and which is not detected here. from transformers.training_args import logger as transformers_training_args_logger From c6f4d7aeb9b061eedf0d0c5e5d0ced8b73e48e52 Mon Sep 17 00:00:00 2001 From: Daniel Han Date: Tue, 12 Aug 2025 19:44:18 -0700 Subject: [PATCH 5/5] Update unsloth/models/_utils.py --- unsloth/models/_utils.py | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) diff --git a/unsloth/models/_utils.py b/unsloth/models/_utils.py index 56860d4276..d904d8674a 100644 --- a/unsloth/models/_utils.py +++ b/unsloth/models/_utils.py @@ -155,28 +155,36 @@ def filter(self, x): return not (self.text in x.getMessage()) if os.environ.get('UNSLOTH_ENABLE_LOGGING', '0') != '1': try: from vllm.worker.worker import logger as vllm_worker_logger - print(f'Patching vLLM worker, executor and prefix logs') vllm_worker_logger.addFilter(HideLoggingMessage("Sleep mode freed")) del vllm_worker_logger - + except: + pass + try: from vllm.v1.worker.gpu_worker import logger as vllm_gpu_worker_logger vllm_gpu_worker_logger.addFilter(HideLoggingMessage("Sleep mode freed")) del vllm_gpu_worker_logger - + except: + pass + try: from vllm.executor.executor_base import logger as vllm_executor_logger vllm_executor_logger.addFilter(HideLoggingMessage("to fall asleep")) vllm_executor_logger.addFilter(HideLoggingMessage("to wake up")) del vllm_executor_logger - + except: + pass + try: from vllm.core.block.prefix_caching_block import logger as vllm_prefix_caching_logger vllm_prefix_caching_logger.addFilter(HideLoggingMessage("reset prefix cache")) del vllm_prefix_caching_logger - + except: + pass + try: from vllm.v1.core.block_pool import logger as vllm_block_pool_logger vllm_block_pool_logger.addFilter(HideLoggingMessage("reset prefix cache")) del vllm_block_pool_logger - except Exception as e: + except: pass +pass # The speedups for torchdynamo mostly come with GPU Ampere or higher and which is not detected here. from transformers.training_args import logger as transformers_training_args_logger