From e776c995c9dfda675f94badd43fe5ed5816432e9 Mon Sep 17 00:00:00 2001
From: xiazhahe <1479989397@qq.com>
Date: Fri, 27 Mar 2026 11:04:24 +0800
Subject: [PATCH 1/4] fix conflict between empty_cache and use_mem_pool

---
 python/sglang/srt/model_executor/model_runner.py | 2 ++
 python/sglang/srt/model_loader/loader.py         | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py
index fc9afafac90b..fa6b94f58d59 100644
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -1108,6 +1108,8 @@ def load_model(self):
                 self.remote_instance_transfer_engine_weight_info = (
                     self.loader.remote_instance_transfer_engine_weight_info
                 )
+        if _is_npu:
+            torch.npu.empty_cache()
         monkey_patch_vllm_parallel_state(reverse=True)
 
         # Publish metadata to ModelExpress if running as seed source
diff --git a/python/sglang/srt/model_loader/loader.py b/python/sglang/srt/model_loader/loader.py
index 27d189d65622..5998eb0234ab 100644
--- a/python/sglang/srt/model_loader/loader.py
+++ b/python/sglang/srt/model_loader/loader.py
@@ -707,8 +707,6 @@ def load_weights_and_postprocess(model, weights, target_device):
                 # parameters onto device for processing and back off after.
                 with device_loading_context(module, target_device):
                     quant_method.process_weights_after_loading(module)
-                if _is_npu:
-                    torch.npu.empty_cache()
 
 
 class LayeredModelLoader(DefaultModelLoader):

From a9cc31d1f8308974c1033d33e2d398432af70c5b Mon Sep 17 00:00:00 2001
From: xiazhahe <1479989397@qq.com>
Date: Sat, 28 Mar 2026 10:32:39 +0800
Subject: [PATCH 2/4] fix conflict between empty_cache and use_mem_pool

---
 python/sglang/srt/model_executor/model_runner.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py
index fa6b94f58d59..9e1f21a6c5a4 100644
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -1108,6 +1108,8 @@ def load_model(self):
                 self.remote_instance_transfer_engine_weight_info = (
                     self.loader.remote_instance_transfer_engine_weight_info
                 )
+        # Cache needs to be cleared after loading model weights (in the self.loader.load_model function). 
+        # To avoid conflict with memory_saver_adapter.region, empty_cache operation is now moved here.
         if _is_npu:
             torch.npu.empty_cache()
         monkey_patch_vllm_parallel_state(reverse=True)

From 2a9ca791238d0f2dc30b713689262bd4d9701141 Mon Sep 17 00:00:00 2001
From: xiazhahe <1479989397@qq.com>
Date: Sat, 28 Mar 2026 10:38:14 +0800
Subject: [PATCH 3/4] fix conflict between empty_cache and use_mem_pool

---
 python/sglang/srt/model_executor/model_runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/sglang/srt/model_executor/model_runner.py b/python/sglang/srt/model_executor/model_runner.py
index 9e1f21a6c5a4..f70fd087ea2e 100644
--- a/python/sglang/srt/model_executor/model_runner.py
+++ b/python/sglang/srt/model_executor/model_runner.py
@@ -1108,7 +1108,7 @@ def load_model(self):
                 self.remote_instance_transfer_engine_weight_info = (
                     self.loader.remote_instance_transfer_engine_weight_info
                 )
-        # Cache needs to be cleared after loading model weights (in the self.loader.load_model function). 
+        # Cache needs to be cleared after loading model weights (in the self.loader.load_model function).
         # To avoid conflict with memory_saver_adapter.region, empty_cache operation is now moved here.
         if _is_npu:
             torch.npu.empty_cache()

From b1aeb3d286dd3be01a70d3aab15b6d13e8eaa413 Mon Sep 17 00:00:00 2001
From: xiazhahe <1479989397@qq.com>
Date: Sat, 28 Mar 2026 18:13:30 +0800
Subject: [PATCH 4/4] add ascend as not support backend for triton

---
 python/sglang/srt/utils/common.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/python/sglang/srt/utils/common.py b/python/sglang/srt/utils/common.py
index 57d85eb2f136..076946ed666b 100644
--- a/python/sglang/srt/utils/common.py
+++ b/python/sglang/srt/utils/common.py
@@ -363,7 +363,7 @@ def get_int_env_var(name: str, default: int = 0) -> int:
 
 
 def support_triton(backend: str) -> bool:
-    return backend not in ["torch_native", "intel_amx"]
+    return backend not in ["torch_native", "intel_amx", "ascend"]
 
 
 _ENABLE_TORCH_INFERENCE_MODE = get_bool_env_var(