diff --git a/vllm/compilation/backends.py b/vllm/compilation/backends.py
index c3900ffc67d3..501436275a09 100644
--- a/vllm/compilation/backends.py
+++ b/vllm/compilation/backends.py
@@ -292,7 +292,6 @@ def compile(
                     "from the cache, took %.3f s",
                     str(compile_range),
                     elapsed,
-                    scope="local",
                 )
             return compiled_graph
 
@@ -377,7 +376,6 @@ def autograd_cache_key(*args, **kwargs):
                 logger.info_once(
                     "Cache the graph of compile range %s for later use",
                     str(compile_range),
-                    scope="local",
                 )
             logger.debug_once(
                 "Store the %s-th graph for compile range%s from %s via handle %s",
@@ -385,7 +383,6 @@ def autograd_cache_key(*args, **kwargs):
                 str(compile_range),
                 self.compiler.name,
                 handle,
-                scope="local",
             )
 
         # after compiling the last graph, record the end time
@@ -399,7 +396,6 @@ def autograd_cache_key(*args, **kwargs):
                 "Compiling a graph for compile range %s takes %.2f s",
                 str(compile_range),
                 elapsed,
-                scope="local",
             )
 
         return compiled_graph
@@ -1072,12 +1068,11 @@ def __call__(self, graph: fx.GraphModule, example_inputs: Sequence[Any]) -> Any:
         disable_cache = disable_cache or is_ngram_gpu_enabled
 
         if disable_cache:
-            logger.info_once("vLLM's torch.compile cache is disabled.", scope="local")
+            logger.info_once("vLLM's torch.compile cache is disabled.")
         else:
             logger.info_once(
                 "Using cache directory: %s for vLLM's torch.compile",
                 local_cache_dir,
-                scope="local",
             )
 
         self.compiler_manager.initialize_cache(
@@ -1134,9 +1129,7 @@ def __call__(self, graph: fx.GraphModule, example_inputs: Sequence[Any]) -> Any:
         from .monitor import torch_compile_start_time
 
         dynamo_time = time.perf_counter() - torch_compile_start_time
-        logger.info_once(
-            "Dynamo bytecode transform time: %.2f s", dynamo_time, scope="local"
-        )
+        logger.info_once("Dynamo bytecode transform time: %.2f s", dynamo_time)
         if self.is_encoder:
             self.compilation_config.encoder_compilation_time += dynamo_time
         else:
@@ -1215,7 +1208,6 @@ def __call__(self, graph: fx.GraphModule, example_inputs: Sequence[Any]) -> Any:
             logger.info_once(
                 "Saved compiler manager cache in %.2f seconds.",
                 elapsed,
-                scope="local",
             )
 
         from torch._guards import detect_fake_mode
@@ -1254,9 +1246,7 @@ def __call__(self, graph: fx.GraphModule, example_inputs: Sequence[Any]) -> Any:
             with open(graph_path, "w") as f:
                 f.write(src)
 
-            logger.debug_once(
-                "Computation graph saved to %s", graph_path, scope="local"
-            )
+            logger.debug_once("Computation graph saved to %s", graph_path)
 
         self._called = True
         graph_to_serialize = (
diff --git a/vllm/compilation/decorators.py b/vllm/compilation/decorators.py
index 79daf00de66b..a9ecb321cb38 100644
--- a/vllm/compilation/decorators.py
+++ b/vllm/compilation/decorators.py
@@ -665,7 +665,6 @@ def save_aot_compiled_function(self: type[_T]) -> None:
             logger.info_once(
                 "saved AOT compiled function to %s",
                 self._aot_compilation_path,
-                scope="local",
             )
         except Exception as e:
             logger.warning(
diff --git a/vllm/compilation/monitor.py b/vllm/compilation/monitor.py
index f584f526f08f..a15f1d5fe73c 100644
--- a/vllm/compilation/monitor.py
+++ b/vllm/compilation/monitor.py
@@ -45,7 +45,7 @@ def monitor_torch_compile(
     else:
         total_compile_time = time.perf_counter() - torch_compile_start_time
         if compilation_config.mode == CompilationMode.VLLM_COMPILE:
-            logger.info_once(message, total_compile_time, scope="local")
+            logger.info_once(message, total_compile_time)
     finally:
         if depyf_cm is not None:
             try:
@@ -76,7 +76,6 @@ def monitor_profiling_run() -> Generator[None, None, None]:
     logger.info_once(
         "Initial profiling/warmup run took %.2f s",
         elapsed,
-        scope="local",
     )
 
 
diff --git a/vllm/config/scheduler.py b/vllm/config/scheduler.py
index b9a48144ded4..fb6951ea7dd1 100644
--- a/vllm/config/scheduler.py
+++ b/vllm/config/scheduler.py
@@ -239,7 +239,6 @@ def __post_init__(self, max_model_len: int, is_encoder_decoder: bool) -> None:
             logger.info_once(
                 "Chunked prefill is enabled with max_num_batched_tokens=%d.",
                 self.max_num_batched_tokens,
-                scope="local",
             )
 
         if self.max_num_partial_prefills > 1:
diff --git a/vllm/config/vllm.py b/vllm/config/vllm.py
index 26506642561f..0726e93d2fef 100644
--- a/vllm/config/vllm.py
+++ b/vllm/config/vllm.py
@@ -716,9 +716,7 @@ def __post_init__(self):
         self.instance_id = f"{time.time_ns()}"
 
         if self.performance_mode != "balanced":
-            logger.info_once(
-                "Performance mode set to '%s'.", self.performance_mode, scope="local"
-            )
+            logger.info_once("Performance mode set to '%s'.", self.performance_mode)
 
         self.try_verify_and_update_config()
 
@@ -818,7 +816,6 @@ def __post_init__(self):
                     "Async scheduling not supported with %s-based "
                     "speculative decoding and will be disabled.",
                     self.speculative_config.method,
-                    scope="local",
                 )
                 self.scheduler_config.async_scheduling = False
             elif (
@@ -828,7 +825,6 @@ def __post_init__(self):
                 logger.warning_once(
                     "Async scheduling is not compatible with "
                     "disable_padded_drafter_batch=True and will be disabled.",
-                    scope="local",
                 )
                 self.scheduler_config.async_scheduling = False
             elif not executor_supports_async_sched:
@@ -836,7 +832,6 @@ def __post_init__(self):
                     "Async scheduling will be disabled because it is not supported "
                     "with the `%s` distributed executor backend. ",
                     executor_backend,
-                    scope="local",
                 )
                 self.scheduler_config.async_scheduling = False
             else:
@@ -855,7 +850,6 @@ def __post_init__(self):
                     logger.info_once(
                         "Disabling NCCL for DP synchronization "
                         "when using async scheduling.",
-                        scope="local",
                     )
                 self.parallel_config.disable_nccl_for_dp_synchronization = True
             else:
@@ -870,7 +864,6 @@ def __post_init__(self):
             logger.warning_once(
                 "Disabling cascade attention (not yet compatible with "
                 "async speculative decoding).",
-                scope="local",
             )
             self.model_config.disable_cascade_attn = True
 
@@ -1231,7 +1224,6 @@ def has_blocked_weights():
             self.model_config.disable_cascade_attn = True
             logger.warning_once(
                 "Disabling cascade attention when VLLM_BATCH_INVARIANT is enabled.",
-                scope="local",
             )
 
         if self.parallel_config.use_ubatching:
@@ -1418,7 +1410,6 @@ def _set_max_num_scheduled_tokens(self):
                     " performance. Consider increasing max_num_batched_tokens to"
                     " accommodate the additional draft token slots, or decrease"
                     " num_speculative_tokens or max_num_seqs.",
-                    scope="local",
                 )
 
             max_num_scheduled_tokens = self.scheduler_config.max_num_scheduled_tokens
diff --git a/vllm/distributed/device_communicators/pynccl.py b/vllm/distributed/device_communicators/pynccl.py
index 6ac3b9ea3c7c..990c808a9831 100644
--- a/vllm/distributed/device_communicators/pynccl.py
+++ b/vllm/distributed/device_communicators/pynccl.py
@@ -108,9 +108,7 @@ def __init__(
         if self.rank == 0:
             # get the unique id from NCCL
             self.unique_id = self.nccl.ncclGetUniqueId()
-            logger.info_once(
-                "vLLM is using nccl==%s", self.nccl.ncclGetVersion(), scope="local"
-            )
+            logger.info_once("vLLM is using nccl==%s", self.nccl.ncclGetVersion())
         else:
             # construct an empty unique id
             self.unique_id = ncclUniqueId()
diff --git a/vllm/engine/arg_utils.py b/vllm/engine/arg_utils.py
index 7028b12dab32..e6528849b219 100644
--- a/vllm/engine/arg_utils.py
+++ b/vllm/engine/arg_utils.py
@@ -2254,7 +2254,6 @@ def _set_default_chunked_prefill_and_prefix_caching_args(
                 "This model does not officially support disabling chunked prefill. "
                 "Disabling this manually may cause the engine to crash "
                 "or produce incorrect outputs.",
-                scope="local",
             )
         elif (
             model_config.runner_type == "pooling"
@@ -2265,7 +2264,6 @@ def _set_default_chunked_prefill_and_prefix_caching_args(
                 "This model does not officially support chunked prefill. "
                 "Enabling this manually may cause the engine to crash "
                 "or produce incorrect outputs.",
-                scope="local",
             )
 
         if self.enable_prefix_caching is None:
@@ -2284,7 +2282,6 @@ def _set_default_chunked_prefill_and_prefix_caching_args(
                 "This model does not officially support prefix caching. "
                 "Enabling this manually may cause the engine to crash "
                 "or produce incorrect outputs.",
-                scope="local",
             )
 
         # Disable chunked prefill and prefix caching for:
diff --git a/vllm/lora/model_manager.py b/vllm/lora/model_manager.py
index 3b58031dcbab..52ff8ebc91f3 100644
--- a/vllm/lora/model_manager.py
+++ b/vllm/lora/model_manager.py
@@ -387,7 +387,6 @@ def _parent_module(module_name: str) -> str:
                     "LoRA is not supported for non-gated MoE gate module."
                     " %s will be ignored.",
                     module_name,
-                    scope="local",
                 )
                 continue
 
diff --git a/vllm/model_executor/layers/attention/attention.py b/vllm/model_executor/layers/attention/attention.py
index d229e32be758..61fb687e4637 100644
--- a/vllm/model_executor/layers/attention/attention.py
+++ b/vllm/model_executor/layers/attention/attention.py
@@ -332,7 +332,6 @@ def __init__(
             logger.warning_once(
                 "Disabling prefix caching for FLASHINFER/TRITON_MLA "
                 "with batch invariance, as it is not yet supported.",
-                scope="local",
             )
             cache_config.enable_prefix_caching = False
 
diff --git a/vllm/model_executor/layers/attention/mla_attention.py b/vllm/model_executor/layers/attention/mla_attention.py
index 9d6ae6bf601e..5c7dc60fe15c 100644
--- a/vllm/model_executor/layers/attention/mla_attention.py
+++ b/vllm/model_executor/layers/attention/mla_attention.py
@@ -427,7 +427,6 @@ def __init__(
             logger.warning_once(
                 "Disabling prefix caching for TRITON_MLA / FLASHINFER "
                 "with batch invariance, as it is not yet supported.",
-                scope="local",
             )
             cache_config.enable_prefix_caching = False
 
@@ -1523,9 +1522,7 @@ def determine_prefill_query_data_type(
 
         if use_fp8:
             fp8_dtype = current_platform.fp8_dtype()
-            logger.info_once(
-                "FP8 prefill attention enabled: query data type is FP8", scope="local"
-            )
+            logger.info_once("FP8 prefill attention enabled: query data type is FP8")
             return fp8_dtype
         elif vllm_config.attention_config.use_prefill_query_quantization:
             logger.info_once(
@@ -1533,7 +1530,6 @@ def determine_prefill_query_data_type(
                 " use_prefill_query_quantization is enabled. Please"
                 " ensure that --kv-cache-dtype is set to fp8 and your prefill"
                 " backend is compatible with FP8 attention.",
-                scope="local",
             )
             return model_dtype
         elif (
@@ -1547,7 +1543,6 @@ def determine_prefill_query_data_type(
                 "prefill latency. To enable, add: "
                 '--attention-config \'{"use_prefill_query_quantization"'
                 ": true}'",
-                scope="local",
             )
 
         return model_dtype
@@ -2225,21 +2220,19 @@ def __init__(
         )
 
         if use_trtllm_ragged_deepseek_prefill():
-            logger.info_once(
-                "Using TRT-LLM ragged DeepSeek prefill for MLA", scope="local"
-            )
+            logger.info_once("Using TRT-LLM ragged DeepSeek prefill for MLA")
             self._run_prefill_context_chunk = (
                 self._run_prefill_context_chunk_trtllm_ragged
             )
             self._run_prefill_new_tokens = self._run_prefill_new_tokens_trtllm_ragged
             self._pad_v = False
         elif use_flashinfer_prefill():
-            logger.info_once("Using FlashInfer prefill for MLA", scope="local")
+            logger.info_once("Using FlashInfer prefill for MLA")
             self._run_prefill_context_chunk = self._run_prefill_context_chunk_fi
             self._run_prefill_new_tokens = self._run_prefill_new_tokens_fi
             self._pad_v = False
         elif use_cudnn_prefill():
-            logger.info_once("Using CUDNN prefill for MLA", scope="local")
+            logger.info_once("Using CUDNN prefill for MLA")
             self._run_prefill_context_chunk = self._run_prefill_context_chunk_cudnn
             self._run_prefill_new_tokens = self._run_prefill_new_tokens_cudnn
             self._pad_v = False
@@ -2250,7 +2243,7 @@ def __init__(
                     "available. Please install flash_attn or use "
                     "--attention-backend ROCM_AITER_MLA."
                 )
-            logger.info_once("Using FlashAttention prefill for MLA", scope="local")
+            logger.info_once("Using FlashAttention prefill for MLA")
             self._run_prefill_context_chunk = self._run_prefill_context_chunk_fa
             self._run_prefill_new_tokens = self._run_prefill_new_tokens_fa
 
diff --git a/vllm/model_executor/layers/attention/mm_encoder_attention.py b/vllm/model_executor/layers/attention/mm_encoder_attention.py
index 6755e9af9e65..46d461c38b3f 100644
--- a/vllm/model_executor/layers/attention/mm_encoder_attention.py
+++ b/vllm/model_executor/layers/attention/mm_encoder_attention.py
@@ -227,9 +227,7 @@ def __init__(
         if self.attn_backend == AttentionBackendEnum.FLASHINFER:
             _get_flashinfer_workspace_buffer()
 
-        logger.info_once(
-            f"Using {self.attn_backend} for MMEncoderAttention.", scope="local"
-        )
+        logger.info_once(f"Using {self.attn_backend} for MMEncoderAttention.")
 
     @classmethod
     def enabled(cls) -> bool:
diff --git a/vllm/model_executor/layers/batch_invariant.py b/vllm/model_executor/layers/batch_invariant.py
index 4a88421e3b51..152333beecb7 100644
--- a/vllm/model_executor/layers/batch_invariant.py
+++ b/vllm/model_executor/layers/batch_invariant.py
@@ -1020,7 +1020,7 @@ def override_envs_for_invariance(
             "You are using a non-decode-invariant form of batch invariance. "
             "This will not be invariant between prefill and decode."
         )
-        logger.warning_once(warning, scope="local")
+        logger.warning_once(warning)
     os.environ["VLLM_ALLREDUCE_USE_SYMM_MEM"] = "0"
 
     os.environ["CUBLAS_WORKSPACE_CONFIG"] = ":4096:8"
diff --git a/vllm/model_executor/layers/fused_moe/experts/batched_deep_gemm_moe.py b/vllm/model_executor/layers/fused_moe/experts/batched_deep_gemm_moe.py
index fad39b3e9d4a..7bd383b9cdac 100644
--- a/vllm/model_executor/layers/fused_moe/experts/batched_deep_gemm_moe.py
+++ b/vllm/model_executor/layers/fused_moe/experts/batched_deep_gemm_moe.py
@@ -369,7 +369,6 @@ def estimate_expected_m(
             logger.warning_once(
                 "DPMetadata unavailable. Defaulting expected_m to "
                 f"{max_tokens_per_expert}.",
-                scope="local",
             )
             return max_tokens_per_expert
 
diff --git a/vllm/model_executor/layers/fused_moe/fused_moe.py b/vllm/model_executor/layers/fused_moe/fused_moe.py
index bf083eb9b55d..cf53907e2c3f 100644
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@@ -1091,7 +1091,6 @@ def get_moe_configs(
         "Using default MoE config. Performance might be sub-optimal! "
         "Config file not found at %s",
         ", ".join(config_file_paths),
-        scope="local",
     )
     return None
 
diff --git a/vllm/model_executor/layers/fused_moe/nixl_ep_prepare_finalize.py b/vllm/model_executor/layers/fused_moe/nixl_ep_prepare_finalize.py
index dbc54e2c9def..a1068a752427 100644
--- a/vllm/model_executor/layers/fused_moe/nixl_ep_prepare_finalize.py
+++ b/vllm/model_executor/layers/fused_moe/nixl_ep_prepare_finalize.py
@@ -123,7 +123,6 @@ def post_init_setup(self, fused_experts: mk.FusedMoEExperts):
                 "NixlEPPrepareAndFinalize is setup to dispatch raw/unquantized "
                 f"activations despite ({fused_experts.__class__.__name__}) being able "
                 "to support quantized activations.",
-                scope="local",
             )
 
     def num_dispatchers(self) -> int:
diff --git a/vllm/model_executor/layers/fused_moe/oracle/fp8.py b/vllm/model_executor/layers/fused_moe/oracle/fp8.py
index 4420bb38731a..584c2bf79285 100644
--- a/vllm/model_executor/layers/fused_moe/oracle/fp8.py
+++ b/vllm/model_executor/layers/fused_moe/oracle/fp8.py
@@ -266,7 +266,7 @@ def _return_or_raise(
                 k_cls, config, weight_key, activation_key, activation_format
             )
             if supported:
-                logger.info_once(_make_log_backend(backend), scope="local")
+                logger.info_once(_make_log_backend(backend))
                 return backend, k_cls
         raise ValueError(_make_log_unsupported(backend, reason))
 
@@ -337,12 +337,10 @@ def _return_or_raise(
                     )
 
                     if supported:
-                        logger.info_once(_make_log_backend(backend), scope="local")
+                        logger.info_once(_make_log_backend(backend))
                         return backend, k_cls
                     else:
-                        logger.debug_once(
-                            _make_log_unsupported(backend, reason), scope="local"
-                        )
+                        logger.debug_once(_make_log_unsupported(backend, reason))
 
             raise NotImplementedError(
                 "Found VLLM_USE_FLASHINFER_MOE_FP8=1, but no "
@@ -396,10 +394,10 @@ def _return_or_raise(
                 activation_format,
             )
             if supported:
-                logger.info_once(_make_log_backend(backend), scope="local")
+                logger.info_once(_make_log_backend(backend))
                 return backend, k_cls
             else:
-                logger.debug_once(_make_log_unsupported(backend, reason), scope="local")
+                logger.debug_once(_make_log_unsupported(backend, reason))
 
     # TODO(rob): per discussion with TPU team, we need a way to register
     # MoE backends by OOT plugins, rather than having an explicit list
@@ -580,7 +578,7 @@ def make_fp8_moe_kernel(
     )
     assert prepare_finalize is not None
 
-    logger.info_once("Using %s", prepare_finalize.__class__.__name__, scope="local")
+    logger.info_once("Using %s", prepare_finalize.__class__.__name__)
 
     # Create Experts.
     if prepare_finalize.activation_format == mk.FusedMoEActivationFormat.BatchedExperts:
diff --git a/vllm/model_executor/layers/fused_moe/oracle/int8.py b/vllm/model_executor/layers/fused_moe/oracle/int8.py
index efa2792b420b..cdb1be108b5d 100644
--- a/vllm/model_executor/layers/fused_moe/oracle/int8.py
+++ b/vllm/model_executor/layers/fused_moe/oracle/int8.py
@@ -117,7 +117,7 @@ def _return_or_raise(
                 k_cls, config, weight_key, activation_key, activation_format
             )
             if supported:
-                logger.info_once(_make_log_backend(backend), scope="local")
+                logger.info_once(_make_log_backend(backend))
                 return backend, k_cls
         raise ValueError(_make_log_unsupported(backend, reason))
 
@@ -138,10 +138,10 @@ def _return_or_raise(
                 activation_format,
             )
             if supported:
-                logger.info_once(_make_log_backend(backend), scope="local")
+                logger.info_once(_make_log_backend(backend))
                 return backend, k_cls
             else:
-                logger.debug_once(_make_log_unsupported(backend, reason), scope="local")
+                logger.debug_once(_make_log_unsupported(backend, reason))
 
     raise NotImplementedError(
         "No Int8 MoE backend supports the deployment configuration."
@@ -193,7 +193,7 @@ def make_int8_moe_kernel(
     )
     assert prepare_finalize is not None
 
-    logger.info_once("Using %s", prepare_finalize.__class__.__name__, scope="local")
+    logger.info_once("Using %s", prepare_finalize.__class__.__name__)
 
     # Create Experts.
     if prepare_finalize.activation_format == mk.FusedMoEActivationFormat.BatchedExperts:
diff --git a/vllm/model_executor/layers/fused_moe/oracle/mxfp4.py b/vllm/model_executor/layers/fused_moe/oracle/mxfp4.py
index 13d7a902c30e..6306d0e2e9d7 100644
--- a/vllm/model_executor/layers/fused_moe/oracle/mxfp4.py
+++ b/vllm/model_executor/layers/fused_moe/oracle/mxfp4.py
@@ -269,7 +269,7 @@ def _return_or_raise(
                 k_cls, config, weight_key, activation_key, activation_format
             )
             if supported:
-                logger.info_once(_make_log_backend(backend), scope="local")
+                logger.info_once(_make_log_backend(backend))
                 return backend, k_cls
         raise ValueError(_make_log_unsupported(backend, reason))
 
@@ -363,10 +363,10 @@ def _return_or_raise(
                 k_cls, config, kMxfp4Static, activation_key, activation_format
             )
             if supported:
-                logger.info_once(_make_log_backend(backend), scope="local")
+                logger.info_once(_make_log_backend(backend))
                 return backend, k_cls
             else:
-                logger.debug_once(_make_log_unsupported(backend, reason), scope="local")
+                logger.debug_once(_make_log_unsupported(backend, reason))
 
     if current_platform.is_xpu():
         backend = Mxfp4MoeBackend.XPU
@@ -861,7 +861,7 @@ def make_mxfp4_moe_kernel(
     )
     assert prepare_finalize is not None
 
-    logger.info_once("Using %s", prepare_finalize.__class__.__name__, scope="local")
+    logger.info_once("Using %s", prepare_finalize.__class__.__name__)
 
     # Create Experts.
     if prepare_finalize.activation_format == mk.FusedMoEActivationFormat.BatchedExperts:
diff --git a/vllm/model_executor/layers/fused_moe/oracle/nvfp4.py b/vllm/model_executor/layers/fused_moe/oracle/nvfp4.py
index 6d0b66cb9f53..724f6d5399bf 100644
--- a/vllm/model_executor/layers/fused_moe/oracle/nvfp4.py
+++ b/vllm/model_executor/layers/fused_moe/oracle/nvfp4.py
@@ -252,12 +252,10 @@ def _return_or_raise(
                         activation_format,
                     )
                     if supported:
-                        logger.info_once(_make_log_backend(backend), scope="local")
+                        logger.info_once(_make_log_backend(backend))
                         return backend, k_cls
                     else:
-                        logger.debug_once(
-                            _make_log_unsupported(backend, reason), scope="local"
-                        )
+                        logger.debug_once(_make_log_unsupported(backend, reason))
 
             raise NotImplementedError(
                 "Found VLLM_USE_FLASHINFER_MOE_FP4=1, but no "
@@ -282,10 +280,10 @@ def _return_or_raise(
             )
 
             if supported:
-                logger.info_once(_make_log_backend(backend), scope="local")
+                logger.info_once(_make_log_backend(backend))
                 return backend, k_cls
             else:
-                logger.debug_once(_make_log_unsupported(backend, reason), scope="local")
+                logger.debug_once(_make_log_unsupported(backend, reason))
 
     raise NotImplementedError(
         "No NvFp4 MoE backend supports the deployment configuration."
diff --git a/vllm/model_executor/layers/fused_moe/oracle/unquantized.py b/vllm/model_executor/layers/fused_moe/oracle/unquantized.py
index af7cb7baf963..cdfd6bb8c027 100644
--- a/vllm/model_executor/layers/fused_moe/oracle/unquantized.py
+++ b/vllm/model_executor/layers/fused_moe/oracle/unquantized.py
@@ -210,7 +210,7 @@ def _return_or_raise(
             k_cls, config, None, None, activation_format
         )
         if supported:
-            logger.info_once(_make_log_backend(backend), scope="local")
+            logger.info_once(_make_log_backend(backend))
             return backend, k_cls
         raise ValueError(_make_log_unsupported(backend, reason))
 
@@ -271,12 +271,10 @@ def _return_or_raise(
                     k_cls, moe_config, None, None, activation_format
                 )
                 if supported:
-                    logger.info_once(_make_log_backend(backend), scope="local")
+                    logger.info_once(_make_log_backend(backend))
                     return backend, k_cls
                 else:
-                    logger.debug_once(
-                        _make_log_unsupported(backend, reason), scope="local"
-                    )
+                    logger.debug_once(_make_log_unsupported(backend, reason))
 
             raise NotImplementedError(
                 "Found VLLM_USE_FLASHINFER_MOE_FP16=1, but no "
@@ -298,10 +296,10 @@ def _return_or_raise(
             k_cls, moe_config, None, None, activation_format
         )
         if supported:
-            logger.info_once(_make_log_backend(backend), scope="local")
+            logger.info_once(_make_log_backend(backend))
             return backend, k_cls
 
-        logger.debug_once(_make_log_unsupported(backend, reason), scope="local")
+        logger.debug_once(_make_log_unsupported(backend, reason))
 
     raise NotImplementedError(
         "No Unquantized MoE backend supports the deployment configuration."
@@ -355,7 +353,7 @@ def make_unquantized_moe_kernel(
     )
     assert prepare_finalize is not None
 
-    logger.info_once("Using %s", prepare_finalize.__class__.__name__, scope="local")
+    logger.info_once("Using %s", prepare_finalize.__class__.__name__)
 
     # Create Experts
     if prepare_finalize.activation_format == mk.FusedMoEActivationFormat.BatchedExperts:
diff --git a/vllm/model_executor/layers/fused_moe/prepare_finalize/deepep_ll.py b/vllm/model_executor/layers/fused_moe/prepare_finalize/deepep_ll.py
index 0c6e32ae4a53..058d09d23bf2 100644
--- a/vllm/model_executor/layers/fused_moe/prepare_finalize/deepep_ll.py
+++ b/vllm/model_executor/layers/fused_moe/prepare_finalize/deepep_ll.py
@@ -135,7 +135,6 @@ def post_init_setup(self, fused_experts: mk.FusedMoEExperts):
                 "DeepEPLLPrepareAndFinalize is setup to dispatch raw/unquantized "
                 f"activations despite ({fused_experts.__class__.__name__}) being able "
                 "to support quantized activations.",
-                scope="local",
             )
 
     def num_dispatchers(self) -> int:
diff --git a/vllm/model_executor/layers/fused_moe/runner/shared_experts.py b/vllm/model_executor/layers/fused_moe/runner/shared_experts.py
index c105badabcb4..227014e23973 100644
--- a/vllm/model_executor/layers/fused_moe/runner/shared_experts.py
+++ b/vllm/model_executor/layers/fused_moe/runner/shared_experts.py
@@ -69,16 +69,14 @@ def __init__(
         # TODO: Remove this after more extensive testings with TP/DP
         # and other execution modes
         if envs.VLLM_DISABLE_SHARED_EXPERTS_STREAM:
-            logger.debug_once("Disabling MoE shared_experts cuda stream", scope="local")
+            logger.debug_once("Disabling MoE shared_experts cuda stream")
             self._stream = None
         else:
             # TODO(rob): enable shared expert overlap with non-cuda-alike.
             # aux_stream() returns None on non-cuda-alike platforms.
             self._stream = aux_stream()
             if self._stream is not None:
-                logger.debug_once(
-                    "Enabled separate cuda stream for MoE shared_experts", scope="local"
-                )
+                logger.debug_once("Enabled separate cuda stream for MoE shared_experts")
 
     @property
     def _disable_shared_experts_overlap(self) -> bool:
diff --git a/vllm/model_executor/layers/mamba/gdn_linear_attn.py b/vllm/model_executor/layers/mamba/gdn_linear_attn.py
index c74ca13024a8..7a0b54335baa 100644
--- a/vllm/model_executor/layers/mamba/gdn_linear_attn.py
+++ b/vllm/model_executor/layers/mamba/gdn_linear_attn.py
@@ -143,15 +143,14 @@ def __init__(self) -> None:
             use_flashinfer = supports_flashinfer
 
         if use_flashinfer:
-            logger.info_once("Using FlashInfer GDN prefill kernel", scope="local")
+            logger.info_once("Using FlashInfer GDN prefill kernel")
             logger.info_once(
                 "FlashInfer GDN prefill kernel is JIT-compiled; first run may "
                 "take a while to compile. Set `--gdn-prefill-backend triton` to "
                 "avoid JIT compile time.",
-                scope="local",
             )
         else:
-            logger.info_once("Using Triton/FLA GDN prefill kernel", scope="local")
+            logger.info_once("Using Triton/FLA GDN prefill kernel")
 
         self._forward_method = (
             self.forward_cuda if use_flashinfer else self.forward_native
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w4a4_mxfp4.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w4a4_mxfp4.py
index 57ebb961d487..9d3e0e7a787f 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w4a4_mxfp4.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_w4a4_mxfp4.py
@@ -44,10 +44,10 @@ def __init__(self, moe):
         self.use_cutlass_mxfp4 = CutlassExpertsMxfp4._supports_current_device()
         self.experts_cls: type[mk.FusedMoEExperts]
         if self.use_cutlass_mxfp4:
-            logger.info_once("Using CutlassExpertsMxfp4 for MXFP4 MoE", scope="local")
+            logger.info_once("Using CutlassExpertsMxfp4 for MXFP4 MoE")
             self.experts_cls = CutlassExpertsMxfp4
         else:
-            logger.info_once("Using MarlinExperts for MXFP4 MoE", scope="local")
+            logger.info_once("Using MarlinExperts for MXFP4 MoE")
             self.experts_cls = MarlinExperts
 
     def create_weights(
diff --git a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_wna16_marlin.py b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_wna16_marlin.py
index 216eed6372a9..81b7efaa6d7e 100644
--- a/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_wna16_marlin.py
+++ b/vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe/compressed_tensors_moe_wna16_marlin.py
@@ -87,7 +87,6 @@ def __init__(
         logger.info_once(
             f"Using {self.kernel_backend} backend for WNA16 MoE "
             f"(group_size={self.group_size}, num_bits={self.num_bits})",
-            scope="local",
         )
 
     def get_weight_shape(
diff --git a/vllm/model_executor/layers/quantization/mxfp4.py b/vllm/model_executor/layers/quantization/mxfp4.py
index 019bb45d65dc..b53c7cc9ac1a 100644
--- a/vllm/model_executor/layers/quantization/mxfp4.py
+++ b/vllm/model_executor/layers/quantization/mxfp4.py
@@ -83,7 +83,6 @@ def get_quant_method(
             logger.debug_once(
                 "MXFP4 linear layer is not implemented - falling back to "
                 "UnquantizedLinearMethod.",
-                scope="local",
             )
             return UnquantizedLinearMethod()
         elif isinstance(layer, FusedMoE):
@@ -92,7 +91,6 @@ def get_quant_method(
             logger.debug_once(
                 "MXFP4 attention layer is not implemented. "
                 "Skipping quantization for this layer.",
-                scope="local",
             )
         return None
 
diff --git a/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py b/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py
index 32c7a772f3fd..973f759698f0 100644
--- a/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py
+++ b/vllm/model_executor/layers/quantization/utils/flashinfer_utils.py
@@ -111,7 +111,6 @@ def get_flashinfer_moe_backend() -> FlashinferMoeBackend:
             logger.info_once(
                 "Flashinfer TRTLLM MOE backend is only supported on "
                 "SM100 and later, using CUTLASS backend instead",
-                scope="local",
             )
             return FlashinferMoeBackend.CUTLASS
         return backend_map[flashinfer_moe_backend]
@@ -239,7 +238,6 @@ def align_fp4_moe_weights_for_fi(
         "Padding intermediate size from %d to %d for up/down projection weights.",
         intermediate,
         padded_intermediate,
-        scope="local",
     )
 
     up_mult = 2 if is_act_and_mul else 1
@@ -285,7 +283,6 @@ def align_trtllm_fp4_moe_hidden_dim_for_fi(
         "performance degradation.",
         hidden_size,
         padded_hidden_size,
-        scope="local",
     )
 
     padded_w13 = w13.new_zeros((num_experts, gate_up_dim, padded_hidden_size // 2))
@@ -331,7 +328,6 @@ def align_fp8_moe_weights_for_fi(
         "Padding intermediate size from %d to %d for up/down projection weights.",
         intermediate,
         padded_intermediate,
-        scope="local",
     )
 
     up_mult = 2 if is_act_and_mul else 1
diff --git a/vllm/model_executor/model_loader/base_loader.py b/vllm/model_executor/model_loader/base_loader.py
index d6c38664fde6..fb2f77d1b112 100644
--- a/vllm/model_executor/model_loader/base_loader.py
+++ b/vllm/model_executor/model_loader/base_loader.py
@@ -70,7 +70,6 @@ def load_model(
                 logger.debug_once(
                     "Peak GPU memory after loading weights: %s GiB",
                     format_gib(peak_memory),
-                    scope="local",
                 )
 
             # Process weights into kernel format. Note that when using online
diff --git a/vllm/model_executor/model_loader/default_loader.py b/vllm/model_executor/model_loader/default_loader.py
index 5c9c97f4b64a..037195b9063a 100644
--- a/vllm/model_executor/model_loader/default_loader.py
+++ b/vllm/model_executor/model_loader/default_loader.py
@@ -384,7 +384,6 @@ def load_weights(self, model: nn.Module, model_config: ModelConfig) -> None:
         logger.info_once(
             "Loading weights took %.2f seconds",
             self.counter_after_loading_weights - self.counter_before_loading_weights,
-            scope="local",
         )
         # We only enable strict check for non-quantized models
         # that have loaded weights tracking currently.
diff --git a/vllm/model_executor/model_loader/sharded_state_loader.py b/vllm/model_executor/model_loader/sharded_state_loader.py
index a87731e8bc0b..87b4b72db2a1 100644
--- a/vllm/model_executor/model_loader/sharded_state_loader.py
+++ b/vllm/model_executor/model_loader/sharded_state_loader.py
@@ -157,7 +157,6 @@ def load_weights(self, model: nn.Module, model_config: ModelConfig) -> None:
         logger.info_once(
             "Loading weights took %.2f seconds",
             counter_after_loading_weights - counter_before_loading_weights,
-            scope="local",
         )
         if state_dict:
             raise ValueError(f"Missing keys {tuple(state_dict)} in loaded state!")
diff --git a/vllm/model_executor/offloader/base.py b/vllm/model_executor/offloader/base.py
index b8c1b6cfa48a..ceff60cd4cde 100644
--- a/vllm/model_executor/offloader/base.py
+++ b/vllm/model_executor/offloader/base.py
@@ -118,11 +118,9 @@ def set_offloader(instance: BaseOffloader) -> None:
     global _instance
     _instance = instance
     if isinstance(instance, NoopOffloader):
-        logger.debug_once(
-            "Offloader set to NoopOffloader (no offloading).", scope="local"
-        )
+        logger.debug_once("Offloader set to NoopOffloader (no offloading).")
     else:
-        logger.info_once("Offloader set to %s", type(instance).__name__, scope="local")
+        logger.info_once("Offloader set to %s", type(instance).__name__)
 
 
 def create_offloader(offload_config: "OffloadConfig") -> BaseOffloader:
diff --git a/vllm/platforms/cuda.py b/vllm/platforms/cuda.py
index d79d31918204..4f9b9d7bf234 100644
--- a/vllm/platforms/cuda.py
+++ b/vllm/platforms/cuda.py
@@ -369,7 +369,6 @@ def get_attn_backend_cls(
             "Using %s attention backend out of potential backends: %s.",
             selected_backend.name,
             "[" + ", ".join(f"'{b[0].name}'" for b in valid_backends_priorities) + "]",
-            scope="local",
         )
 
         return selected_backend.get_path()
@@ -423,7 +422,6 @@ def get_vit_attn_backend(
                 if is_backend_supported:
                     logger.info_once(
                         f"Using backend {vit_attn_backend} for vit attention",
-                        scope="local",
                     )
                     return vit_attn_backend
             except ImportError:
diff --git a/vllm/profiler/wrapper.py b/vllm/profiler/wrapper.py
index 7cd4d8874df2..201b45078492 100644
--- a/vllm/profiler/wrapper.py
+++ b/vllm/profiler/wrapper.py
@@ -63,7 +63,7 @@ def _call_stop(self) -> None:
         """Call _stop with error handling but no safeguards."""
         try:
             self._stop()
-            logger.info_once("Profiler stopped successfully.", scope="local")
+            logger.info_once("Profiler stopped successfully.")
         except Exception as e:
             logger.warning("Failed to stop profiler: %s", e)
         self._running = False  # Always mark as not running, assume stop worked
@@ -93,7 +93,7 @@ def step(self) -> None:
             and self._delay_iters > 0
             and self._active_iteration_count == self._delay_iters
         ):
-            logger.info_once("Starting profiler after delay...", scope="local")
+            logger.info_once("Starting profiler after delay...")
             self._call_start()
 
         # Call profiler step for schedule-based profiling
@@ -109,9 +109,7 @@ def step(self) -> None:
             # Automatically stop the profiler after max iters
             # will be marked as not running, but leave as active so that stop
             # can clean up properly
-            logger.info_once(
-                "Max profiling iterations reached. Stopping profiler...", scope="local"
-            )
+            logger.info_once("Max profiling iterations reached. Stopping profiler...")
             self._call_stop()
             return
 
@@ -141,7 +139,7 @@ def stop(self) -> None:
 
     def shutdown(self) -> None:
         """Ensure profiler is stopped when shutting down."""
-        logger.info_once("Shutting down profiler", scope="local")
+        logger.info_once("Shutting down profiler")
         if self._running:
             self.stop()
 
@@ -176,7 +174,6 @@ def __init__(
             logger.info_once(
                 "Torch profiling enabled. Traces will be saved to: %s",
                 torch_profiler_trace_dir,
-                scope="local",
             )
             logger.debug(
                 "Profiler config: record_shapes=%s,"
@@ -216,7 +213,6 @@ def __init__(
                     profiler_config.wait_iterations,
                     profiler_config.warmup_iterations,
                     profiler_config.active_iterations,
-                    scope="local",
                 )
 
         self.profiler = torch.profiler.profile(
diff --git a/vllm/utils/deep_gemm.py b/vllm/utils/deep_gemm.py
index a2e10ea3951f..637e9ec37e08 100644
--- a/vllm/utils/deep_gemm.py
+++ b/vllm/utils/deep_gemm.py
@@ -106,16 +106,14 @@ def is_deep_gemm_e8m0_used() -> bool:
     _lazy_init()
 
     if _fp8_gemm_nt_impl is None:
-        logger.info_once(
-            "DeepGEMM E8M0 disabled: _fp8_gemm_nt_impl not found", scope="local"
-        )
+        logger.info_once("DeepGEMM E8M0 disabled: _fp8_gemm_nt_impl not found")
         return False
 
     if envs.VLLM_USE_DEEP_GEMM_E8M0:
-        logger.info_once("DeepGEMM E8M0 enabled on current platform.", scope="local")
+        logger.info_once("DeepGEMM E8M0 enabled on current platform.")
         return True
 
-    logger.info_once("DeepGEMM E8M0 disabled on current configuration.", scope="local")
+    logger.info_once("DeepGEMM E8M0 disabled on current configuration.")
     return False
 
 
diff --git a/vllm/utils/import_utils.py b/vllm/utils/import_utils.py
index 31b63d1e6b49..6cf57c6894ab 100644
--- a/vllm/utils/import_utils.py
+++ b/vllm/utils/import_utils.py
@@ -66,14 +66,12 @@ def import_triton_kernels():
 
         logger.debug_once(
             f"Loading module triton_kernels from {triton_kernels.__file__}.",
-            scope="local",
         )
     elif _has_module("vllm.third_party.triton_kernels"):
         import vllm.third_party.triton_kernels as triton_kernels
 
         logger.debug_once(
             f"Loading module triton_kernels from {triton_kernels.__file__}.",
-            scope="local",
         )
         sys.modules["triton_kernels"] = triton_kernels
     else:
diff --git a/vllm/v1/attention/backends/fa_utils.py b/vllm/v1/attention/backends/fa_utils.py
index db8cafeb7487..76f98965623d 100644
--- a/vllm/v1/attention/backends/fa_utils.py
+++ b/vllm/v1/attention/backends/fa_utils.py
@@ -118,7 +118,6 @@ def get_flash_attn_version(
             logger.warning_once(
                 "Cannot use FA version 4 with batch invariance, "
                 "defaulting to FA version 2.",
-                scope="local",
             )
             fa_version = 2
 
diff --git a/vllm/v1/attention/backends/flash_attn.py b/vllm/v1/attention/backends/flash_attn.py
index 6af0fa7c4966..4926851903b0 100755
--- a/vllm/v1/attention/backends/flash_attn.py
+++ b/vllm/v1/attention/backends/flash_attn.py
@@ -637,7 +637,6 @@ def __init__(
         logger.info_once(
             "Using FlashAttention version %s",
             self.vllm_flash_attn_version,
-            scope="local",
         )
         # Cache the batch invariant result for use in forward passes
         self.batch_invariant_enabled = envs.VLLM_BATCH_INVARIANT
diff --git a/vllm/v1/core/kv_cache_utils.py b/vllm/v1/core/kv_cache_utils.py
index 3f6999b82a4d..8f4963fcc873 100644
--- a/vllm/v1/core/kv_cache_utils.py
+++ b/vllm/v1/core/kv_cache_utils.py
@@ -1334,7 +1334,7 @@ def _report_kv_cache_config(
             dcp_size,
         )
     num_tokens_str = f"{num_tokens:,}"
-    logger.info_once("GPU KV cache size: %s tokens", num_tokens_str, scope="local")
+    logger.info_once("GPU KV cache size: %s tokens", num_tokens_str)
     max_model_len_str = f"{vllm_config.model_config.max_model_len:,}"
     max_concurrency = get_max_concurrency_for_kv_cache_config(
         vllm_config, kv_cache_config
@@ -1343,7 +1343,6 @@ def _report_kv_cache_config(
         "Maximum concurrency for %s tokens per request: %.2fx",
         max_model_len_str,
         max_concurrency,
-        scope="local",
     )
 
 
@@ -1445,7 +1444,6 @@ def _auto_fit_max_model_len(
             "Auto-fit max_model_len: attention-free model, "
             "using derived max_model_len=%d",
             original_max,
-            scope="local",
         )
         return
 
@@ -1472,7 +1470,6 @@ def _auto_fit_max_model_len(
             "Auto-fit max_model_len: full model context length %d fits in "
             "available GPU memory",
             original_max,
-            scope="local",
         )
     else:
         # Need to reduce max_model_len to fit in memory
@@ -1483,7 +1480,6 @@ def _auto_fit_max_model_len(
             original_max,
             auto_fit_max,
             format_gib(limiting_worker_mem),
-            scope="local",
         )
 
 
diff --git a/vllm/v1/engine/core.py b/vllm/v1/engine/core.py
index c2c1a239adb2..6bf6910cc6f2 100644
--- a/vllm/v1/engine/core.py
+++ b/vllm/v1/engine/core.py
@@ -293,7 +293,6 @@ def _initialize_kv_caches(self, vllm_config: VllmConfig) -> KVCacheConfig:
                 compile_time + encoder_compile_time,
                 compile_time,
                 encoder_compile_time,
-                scope="local",
             )
         elif compile_time > 0:
             logger.info_once(
@@ -301,13 +300,11 @@ def _initialize_kv_caches(self, vllm_config: VllmConfig) -> KVCacheConfig:
                 "%.2f s (compilation: %.2f s)",
                 elapsed,
                 compile_time,
-                scope="local",
             )
         else:
             logger.info_once(
                 "init engine (profile, create kv cache, warmup model) took %.2f s",
                 elapsed,
-                scope="local",
             )
         return scheduler_kv_cache_config
 
diff --git a/vllm/v1/executor/multiproc_executor.py b/vllm/v1/executor/multiproc_executor.py
index 52969783f091..db21d7cee779 100644
--- a/vllm/v1/executor/multiproc_executor.py
+++ b/vllm/v1/executor/multiproc_executor.py
@@ -1032,7 +1032,6 @@ def set_multiprocessing_worker_envs():
                 "external environment to tune this value as needed.",
                 current_parallelism,
                 default_omp_num_threads,
-                scope="local",
             )
             os.environ["OMP_NUM_THREADS"] = str(default_omp_num_threads)
             torch.set_num_threads(default_omp_num_threads)
diff --git a/vllm/v1/worker/dp_utils.py b/vllm/v1/worker/dp_utils.py
index 051fe42155ee..fbc88f81db8b 100644
--- a/vllm/v1/worker/dp_utils.py
+++ b/vllm/v1/worker/dp_utils.py
@@ -29,7 +29,6 @@ def _get_device_and_group(parallel_config: ParallelConfig):
     if parallel_config.disable_nccl_for_dp_synchronization:
         logger.info_once(
             "Using CPU all reduce to synchronize DP padding between ranks.",
-            scope="local",
         )
         device = "cpu"
         group = get_dp_group().cpu_group
diff --git a/vllm/v1/worker/gpu/eplb_utils.py b/vllm/v1/worker/gpu/eplb_utils.py
index 61d70fafea33..4ffb081ca303 100644
--- a/vllm/v1/worker/gpu/eplb_utils.py
+++ b/vllm/v1/worker/gpu/eplb_utils.py
@@ -92,9 +92,7 @@ def maybe_register_model(
         if not is_mixture_of_experts(model):
             return False
 
-        logger.info_once(
-            "EPLB is enabled for model %s.", model_config.model, scope="local"
-        )
+        logger.info_once("EPLB is enabled for model %s.", model_config.model)
         assert self.state is not None
         self.state.add_model(model, model_config)
         self._has_registered_models = True
diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py
index b6bc942fc857..386db4fecd4b 100644
--- a/vllm/v1/worker/gpu_model_runner.py
+++ b/vllm/v1/worker/gpu_model_runner.py
@@ -4855,7 +4855,6 @@ def load_model(self, load_dummy_weights: bool = False) -> None:
             "Model loading took %s GiB memory and %.6f seconds",
             format_gib(self.model_memory_usage),
             time_after_load - time_before_load,
-            scope="local",
         )
         if not load_dummy_weights:
             prepare_communication_buffer_for_model(self.model)
@@ -4989,7 +4988,7 @@ def reload_weights(
             )
 
         # begin loading weights
-        logger.info_once("Reloading weights inplace...", scope="local")
+        logger.info_once("Reloading weights inplace...")
         if is_checkpoint_format:
             # load weights from checkpoint/ original model format
             initialize_layerwise_reload(model)
@@ -5001,7 +5000,6 @@ def reload_weights(
             logger.warning_once(
                 "Reloading with `is_checkpoint_format=True` requires that "
                 "weights be in kernel format and already sharded",
-                scope="local",
             )
             loaded_weights = set()
             for name, loaded_weight in weights_iterator:
@@ -5015,7 +5013,6 @@ def reload_weights(
         logger.info_once(
             "Reloading and processing weights took %.2f seconds",
             diff_seconds,
-            scope="local",
         )
         if self.model_config.quantization is None and loaded_weights is not None:
             weights_not_loaded = weights_to_load - loaded_weights
@@ -5802,7 +5799,6 @@ def profile_run(self) -> None:
                             encoder_budget,
                             max_mm_items_per_batch,
                             dummy_modality,
-                            scope="local",
                         )
 
                         # Create dummy batch of multimodal inputs.
@@ -6099,7 +6095,6 @@ def capture_model(self) -> int:
             "Graph capturing finished in %.0f secs, took %.2f GiB",
             elapsed_time,
             cuda_graph_size / (1 << 30),
-            scope="local",
         )
         return cuda_graph_size
 
diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py
index 98f3212bae0d..afbee95c4d7d 100644
--- a/vllm/v1/worker/gpu_worker.py
+++ b/vllm/v1/worker/gpu_worker.py
@@ -269,7 +269,7 @@ def init_device(self):
             )
 
             if self.use_v2_model_runner:
-                logger.info_once("Using V2 Model Runner", scope="local")
+                logger.info_once("Using V2 Model Runner")
 
             # Set random seed.
             set_random_seed(self.model_config.seed)
@@ -440,7 +440,6 @@ def determine_available_memory(self) -> int:
         logger.info_once(
             "Available KV cache memory: %s GiB",
             format_gib(self.available_kv_cache_memory_bytes),
-            scope="local",
         )
 
         if cudagraph_memory_estimate > 0: