From af9a3f7ebf5cae0bee83b44b3e4e14a40c8ecce5 Mon Sep 17 00:00:00 2001 From: glenliu21 Date: Mon, 17 Nov 2025 21:40:48 -0500 Subject: [PATCH 1/2] small lora layers cleanups --- python/sglang/srt/lora/lora.py | 10 +++------- python/sglang/srt/lora/lora_manager.py | 2 +- python/sglang/srt/lora/utils.py | 10 ---------- 3 files changed, 4 insertions(+), 18 deletions(-) diff --git a/python/sglang/srt/lora/lora.py b/python/sglang/srt/lora/lora.py index f1199304a269..a6c485e74915 100644 --- a/python/sglang/srt/lora/lora.py +++ b/python/sglang/srt/lora/lora.py @@ -26,6 +26,7 @@ from torch import nn from sglang.srt.configs.load_config import LoadConfig +from sglang.srt.layers.utils import get_layer_id from sglang.srt.lora.backend.base_backend import BaseLoRABackend from sglang.srt.lora.backend.lora_registry import LORA_SUPPORTED_BACKENDS from sglang.srt.lora.lora_config import LoRAConfig @@ -71,8 +72,6 @@ def __init__( ] ) - self.weights: Dict[str, torch.Tensor] = {} - # initialize the LoRA weights to cpu def initialize_weights(self): model_path = self.config.path @@ -83,12 +82,9 @@ def initialize_weights(self): model_path, revision=revision, fall_back_to_pt=True ) ): - match = re.search(r"layers\.(\d+)\.", name) - if match is not None: - layer_id = int(match.group(1)) + layer_id = get_layer_id(name) + if layer_id is not None: self.layers[layer_id].weights[name] = loaded_weight.cpu() - else: - self.weights[name] = loaded_weight.cpu() # normalize kv_proj and gate_up_proj for layer in self.layers: diff --git a/python/sglang/srt/lora/lora_manager.py b/python/sglang/srt/lora/lora_manager.py index 5d0d68d51fcc..1e1a1400de13 100644 --- a/python/sglang/srt/lora/lora_manager.py +++ b/python/sglang/srt/lora/lora_manager.py @@ -21,6 +21,7 @@ import torch from sglang.srt.configs.load_config import LoadConfig +from sglang.srt.layers.utils import get_layer_id from sglang.srt.lora.backend.base_backend import BaseLoRABackend from sglang.srt.lora.backend.lora_registry import get_backend_from_name from sglang.srt.lora.layers import BaseLayerWithLoRA, get_lora_layer @@ -30,7 +31,6 @@ from sglang.srt.lora.mem_pool import LoRAMemoryPool from sglang.srt.lora.utils import ( LoRAType, - get_layer_id, get_normalized_target_modules, get_target_module_name, ) diff --git a/python/sglang/srt/lora/utils.py b/python/sglang/srt/lora/utils.py index b0ed5bfc4a99..b61fa5e38535 100644 --- a/python/sglang/srt/lora/utils.py +++ b/python/sglang/srt/lora/utils.py @@ -46,16 +46,6 @@ class LoRAType(Enum): LORA_B = 1 -def get_layer_id(name: str) -> int: - """ - Extract integer id of layer from its name in string. - """ - match = re.search(r"layers\.(\d+)\.", name) - if match is None: - return None - return int(match.group(1)) - - def get_hidden_dim( module_name: str, config: AutoConfig, base_model: torch.nn.Module, layer_idx: int ) -> Tuple[int]: From 89203ef2c8754241806b9aa730ab71e9d8136787 Mon Sep 17 00:00:00 2001 From: glenliu21 Date: Mon, 17 Nov 2025 21:44:27 -0500 Subject: [PATCH 2/2] precommit fixes --- .../sagemaker/deploy_and_serve_endpoint.py | 30 +++++++++---------- python/sglang/srt/lora/lora.py | 1 - python/sglang/srt/lora/utils.py | 1 - 3 files changed, 15 insertions(+), 17 deletions(-) diff --git a/examples/sagemaker/deploy_and_serve_endpoint.py b/examples/sagemaker/deploy_and_serve_endpoint.py index afc4cc1fc66b..e518183c39f3 100644 --- a/examples/sagemaker/deploy_and_serve_endpoint.py +++ b/examples/sagemaker/deploy_and_serve_endpoint.py @@ -1,7 +1,6 @@ import json -import boto3 -import sagemaker +import boto3 from sagemaker import serializers from sagemaker.model import Model from sagemaker.predictor import Predictor @@ -10,20 +9,22 @@ sm_client = boto_session.client("sagemaker") sm_role = boto_session.resource("iam").Role("SageMakerRole").arn -endpoint_name="" -image_uri="" -model_id="" # eg: Qwen/Qwen3-0.6B from https://huggingface.co/Qwen/Qwen3-0.6B -hf_token="" -prompt="" +endpoint_name = "" +image_uri = "" +model_id = ( + "" # eg: Qwen/Qwen3-0.6B from https://huggingface.co/Qwen/Qwen3-0.6B +) +hf_token = "" +prompt = "" model = Model( - name=endpoint_name, - image_uri=image_uri, - role=sm_role, - env={ - "SM_SGLANG_MODEL_PATH": model_id, - "HF_TOKEN": hf_token, - }, + name=endpoint_name, + image_uri=image_uri, + role=sm_role, + env={ + "SM_SGLANG_MODEL_PATH": model_id, + "HF_TOKEN": hf_token, + }, ) print("Model created successfully") print("Starting endpoint deployment (this may take 10-15 minutes)...") @@ -66,4 +67,3 @@ print("Warning: Response is not valid JSON. Returning as string.") print(f"Received model response: '{response}'") - diff --git a/python/sglang/srt/lora/lora.py b/python/sglang/srt/lora/lora.py index a6c485e74915..995aca6e5e36 100644 --- a/python/sglang/srt/lora/lora.py +++ b/python/sglang/srt/lora/lora.py @@ -19,7 +19,6 @@ # https://github.com/vllm-project/vllm/blob/4abf6336ec65c270343eb895e7b18786e9274176/vllm/lora/layers.py import logging -import re from typing import Dict, List import torch diff --git a/python/sglang/srt/lora/utils.py b/python/sglang/srt/lora/utils.py index b61fa5e38535..48a450d9b468 100644 --- a/python/sglang/srt/lora/utils.py +++ b/python/sglang/srt/lora/utils.py @@ -1,4 +1,3 @@ -import re from dataclasses import dataclass from enum import Enum from typing import Iterable, Optional, Set, Tuple