From af9a3f7ebf5cae0bee83b44b3e4e14a40c8ecce5 Mon Sep 17 00:00:00 2001
From: glenliu21 <glenpwnage@gmail.com>
Date: Mon, 17 Nov 2025 21:40:48 -0500
Subject: [PATCH 1/2] small lora layers cleanups

---
 python/sglang/srt/lora/lora.py         | 10 +++-------
 python/sglang/srt/lora/lora_manager.py |  2 +-
 python/sglang/srt/lora/utils.py        | 10 ----------
 3 files changed, 4 insertions(+), 18 deletions(-)

diff --git a/python/sglang/srt/lora/lora.py b/python/sglang/srt/lora/lora.py
index f1199304a269..a6c485e74915 100644
--- a/python/sglang/srt/lora/lora.py
+++ b/python/sglang/srt/lora/lora.py
@@ -26,6 +26,7 @@
 from torch import nn
 
 from sglang.srt.configs.load_config import LoadConfig
+from sglang.srt.layers.utils import get_layer_id
 from sglang.srt.lora.backend.base_backend import BaseLoRABackend
 from sglang.srt.lora.backend.lora_registry import LORA_SUPPORTED_BACKENDS
 from sglang.srt.lora.lora_config import LoRAConfig
@@ -71,8 +72,6 @@ def __init__(
             ]
         )
 
-        self.weights: Dict[str, torch.Tensor] = {}
-
     # initialize the LoRA weights to cpu
     def initialize_weights(self):
         model_path = self.config.path
@@ -83,12 +82,9 @@ def initialize_weights(self):
                 model_path, revision=revision, fall_back_to_pt=True
             )
         ):
-            match = re.search(r"layers\.(\d+)\.", name)
-            if match is not None:
-                layer_id = int(match.group(1))
+            layer_id = get_layer_id(name)
+            if layer_id is not None:
                 self.layers[layer_id].weights[name] = loaded_weight.cpu()
-            else:
-                self.weights[name] = loaded_weight.cpu()
 
         # normalize kv_proj and gate_up_proj
         for layer in self.layers:
diff --git a/python/sglang/srt/lora/lora_manager.py b/python/sglang/srt/lora/lora_manager.py
index 5d0d68d51fcc..1e1a1400de13 100644
--- a/python/sglang/srt/lora/lora_manager.py
+++ b/python/sglang/srt/lora/lora_manager.py
@@ -21,6 +21,7 @@
 import torch
 
 from sglang.srt.configs.load_config import LoadConfig
+from sglang.srt.layers.utils import get_layer_id
 from sglang.srt.lora.backend.base_backend import BaseLoRABackend
 from sglang.srt.lora.backend.lora_registry import get_backend_from_name
 from sglang.srt.lora.layers import BaseLayerWithLoRA, get_lora_layer
@@ -30,7 +31,6 @@
 from sglang.srt.lora.mem_pool import LoRAMemoryPool
 from sglang.srt.lora.utils import (
     LoRAType,
-    get_layer_id,
     get_normalized_target_modules,
     get_target_module_name,
 )
diff --git a/python/sglang/srt/lora/utils.py b/python/sglang/srt/lora/utils.py
index b0ed5bfc4a99..b61fa5e38535 100644
--- a/python/sglang/srt/lora/utils.py
+++ b/python/sglang/srt/lora/utils.py
@@ -46,16 +46,6 @@ class LoRAType(Enum):
     LORA_B = 1
 
 
-def get_layer_id(name: str) -> int:
-    """
-    Extract integer id of layer from its name in string.
-    """
-    match = re.search(r"layers\.(\d+)\.", name)
-    if match is None:
-        return None
-    return int(match.group(1))
-
-
 def get_hidden_dim(
     module_name: str, config: AutoConfig, base_model: torch.nn.Module, layer_idx: int
 ) -> Tuple[int]:

From 89203ef2c8754241806b9aa730ab71e9d8136787 Mon Sep 17 00:00:00 2001
From: glenliu21 <glenpwnage@gmail.com>
Date: Mon, 17 Nov 2025 21:44:27 -0500
Subject: [PATCH 2/2] precommit fixes

---
 .../sagemaker/deploy_and_serve_endpoint.py    | 30 +++++++++----------
 python/sglang/srt/lora/lora.py                |  1 -
 python/sglang/srt/lora/utils.py               |  1 -
 3 files changed, 15 insertions(+), 17 deletions(-)

diff --git a/examples/sagemaker/deploy_and_serve_endpoint.py b/examples/sagemaker/deploy_and_serve_endpoint.py
index afc4cc1fc66b..e518183c39f3 100644
--- a/examples/sagemaker/deploy_and_serve_endpoint.py
+++ b/examples/sagemaker/deploy_and_serve_endpoint.py
@@ -1,7 +1,6 @@
 import json
-import boto3
-import sagemaker
 
+import boto3
 from sagemaker import serializers
 from sagemaker.model import Model
 from sagemaker.predictor import Predictor
@@ -10,20 +9,22 @@
 sm_client = boto_session.client("sagemaker")
 sm_role = boto_session.resource("iam").Role("SageMakerRole").arn
 
-endpoint_name="<YOUR_ENDPOINT_NAME>"
-image_uri="<YOUR_DOCKER_IMAGE_URI>"
-model_id="<YOUR_MODEL_ID>" # eg: Qwen/Qwen3-0.6B from https://huggingface.co/Qwen/Qwen3-0.6B
-hf_token="<YOUR_HUGGINGFACE_TOKEN>"
-prompt="<YOUR_ENDPOINT_PROMPT>"
+endpoint_name = "<YOUR_ENDPOINT_NAME>"
+image_uri = "<YOUR_DOCKER_IMAGE_URI>"
+model_id = (
+    "<YOUR_MODEL_ID>"  # eg: Qwen/Qwen3-0.6B from https://huggingface.co/Qwen/Qwen3-0.6B
+)
+hf_token = "<YOUR_HUGGINGFACE_TOKEN>"
+prompt = "<YOUR_ENDPOINT_PROMPT>"
 
 model = Model(
-  name=endpoint_name,
-  image_uri=image_uri,
-  role=sm_role,
-  env={
-      "SM_SGLANG_MODEL_PATH": model_id,
-      "HF_TOKEN": hf_token,
-  },
+    name=endpoint_name,
+    image_uri=image_uri,
+    role=sm_role,
+    env={
+        "SM_SGLANG_MODEL_PATH": model_id,
+        "HF_TOKEN": hf_token,
+    },
 )
 print("Model created successfully")
 print("Starting endpoint deployment (this may take 10-15 minutes)...")
@@ -66,4 +67,3 @@
         print("Warning: Response is not valid JSON. Returning as string.")
 
 print(f"Received model response: '{response}'")
-
diff --git a/python/sglang/srt/lora/lora.py b/python/sglang/srt/lora/lora.py
index a6c485e74915..995aca6e5e36 100644
--- a/python/sglang/srt/lora/lora.py
+++ b/python/sglang/srt/lora/lora.py
@@ -19,7 +19,6 @@
 # https://github.com/vllm-project/vllm/blob/4abf6336ec65c270343eb895e7b18786e9274176/vllm/lora/layers.py
 
 import logging
-import re
 from typing import Dict, List
 
 import torch
diff --git a/python/sglang/srt/lora/utils.py b/python/sglang/srt/lora/utils.py
index b61fa5e38535..48a450d9b468 100644
--- a/python/sglang/srt/lora/utils.py
+++ b/python/sglang/srt/lora/utils.py
@@ -1,4 +1,3 @@
-import re
 from dataclasses import dataclass
 from enum import Enum
 from typing import Iterable, Optional, Set, Tuple