diff --git a/python/sglang/srt/managers/configure_logging.py b/python/sglang/srt/managers/configure_logging.py index 0dc78edfa075..7f268e31b7b0 100644 --- a/python/sglang/srt/managers/configure_logging.py +++ b/python/sglang/srt/managers/configure_logging.py @@ -33,15 +33,30 @@ "--dump-requests-folder", type=str, default="/tmp/sglang_request_dump" ) parser.add_argument("--dump-requests-threshold", type=int, default=1000) + parser.add_argument( + "--dump-requests-exclude-meta-keys", + type=str, + default=None, + help=( + "Comma-separated meta_info keys to strip from each dumped request " + "(e.g. 'routed_experts,hidden_states'). Pass an empty string to " + "keep all keys. If not set, the server default is used." + ), + ) args = parser.parse_args() - response = requests.post( - args.url + "/configure_logging", - json={ - "log_requests": args.log_requests, - "log_requests_level": args.log_requests_level, # Log full requests - "dump_requests_folder": args.dump_requests_folder, - "dump_requests_threshold": args.dump_requests_threshold, - }, - ) + payload = { + "log_requests": args.log_requests, + "log_requests_level": args.log_requests_level, # Log full requests + "dump_requests_folder": args.dump_requests_folder, + "dump_requests_threshold": args.dump_requests_threshold, + } + if args.dump_requests_exclude_meta_keys is not None: + payload["dump_requests_exclude_meta_keys"] = [ + k.strip() + for k in args.dump_requests_exclude_meta_keys.split(",") + if k.strip() + ] + + response = requests.post(args.url + "/configure_logging", json=payload) assert response.status_code == 200 diff --git a/python/sglang/srt/managers/io_struct.py b/python/sglang/srt/managers/io_struct.py index 005079801f70..2d61a4a49d48 100644 --- a/python/sglang/srt/managers/io_struct.py +++ b/python/sglang/srt/managers/io_struct.py @@ -1758,6 +1758,7 @@ class ConfigureLoggingReq(BaseReq): dump_requests_folder: Optional[str] = None dump_requests_threshold: Optional[int] = None crash_dump_folder: Optional[str] = None + dump_requests_exclude_meta_keys: Optional[List[str]] = None @dataclass diff --git a/python/sglang/srt/managers/tokenizer_manager.py b/python/sglang/srt/managers/tokenizer_manager.py index c3b4005abe90..1eea93d2eb65 100644 --- a/python/sglang/srt/managers/tokenizer_manager.py +++ b/python/sglang/srt/managers/tokenizer_manager.py @@ -391,6 +391,10 @@ def init_request_logging_and_dumping(self): # Dumping self.dump_requests_folder = "" # By default do not dump self.dump_requests_threshold = 1000 + self.dump_requests_exclude_meta_keys: List[str] = [ + "routed_experts", + "hidden_states", + ] self.dump_request_list: List[Tuple] = [] self.crash_dump_request_list: deque[Tuple] = deque() self.crash_dump_performed = False # Flag to ensure dump is only called once @@ -1577,6 +1581,10 @@ def configure_logging(self, obj: ConfigureLoggingReq): self.dump_requests_folder = obj.dump_requests_folder if obj.dump_requests_threshold is not None: self.dump_requests_threshold = obj.dump_requests_threshold + if obj.dump_requests_exclude_meta_keys is not None: + self.dump_requests_exclude_meta_keys = list( + obj.dump_requests_exclude_meta_keys + ) if obj.crash_dump_folder is not None: self.crash_dump_folder = obj.crash_dump_folder logging.info(f"Config logging: {obj=}") @@ -2193,6 +2201,16 @@ def collect_metrics(self, state: ReqState, recv_obj: BatchStrOutput, i: int): ) def dump_requests(self, state: ReqState, out_dict: dict): + if self.dump_requests_exclude_meta_keys and isinstance( + out_dict.get("meta_info"), dict + ): + exclude = self.dump_requests_exclude_meta_keys + if any(k in out_dict["meta_info"] for k in exclude): + filtered_meta = { + k: v for k, v in out_dict["meta_info"].items() if k not in exclude + } + out_dict = {**out_dict, "meta_info": filtered_meta} + self.dump_request_list.append( ( state.obj, @@ -2243,7 +2261,20 @@ def _dump_data_to_file( def background_task(): os.makedirs(os.path.dirname(filename), exist_ok=True) with open(filename, "wb") as f: - pickle.dump(to_dump_with_server_args, f) + try: + pickle.dump(to_dump_with_server_args, f) + except Exception as e: + # When the server is launched with --trust-remote-code, + # server_args sometimes fails to pickle. Retry without + # server_args so the request data still gets persisted. + logger.error( + f"Failed to pickle dump with server_args: {e!r}; " + "retrying without server_args" + ) + f.seek(0) + f.truncate() + to_dump_with_server_args["server_args"] = None + pickle.dump(to_dump_with_server_args, f) asyncio.create_task(asyncio.to_thread(background_task)) @@ -2306,7 +2337,20 @@ def dump_requests_before_crash( "launch_command": " ".join(sys.argv), } with open(filename, "wb") as f: - pickle.dump(data_to_dump_with_server_args, f) + try: + pickle.dump(data_to_dump_with_server_args, f) + except Exception as e: + # When the server is launched with --trust-remote-code, + # server_args sometimes fails to pickle. Retry without + # server_args so the request data still gets persisted. + logger.error( + f"Failed to pickle dump with server_args: {e!r}; " + "retrying without server_args" + ) + f.seek(0) + f.truncate() + data_to_dump_with_server_args["server_args"] = None + pickle.dump(data_to_dump_with_server_args, f) logger.error( f"Dumped {len(self.crash_dump_request_list)} finished and {len(unfinished_requests)} unfinished requests before crash to {filename}" )