diff --git a/python/sglang/srt/server_args.py b/python/sglang/srt/server_args.py index 2be1c045a0bf..068a7e33665c 100644 --- a/python/sglang/srt/server_args.py +++ b/python/sglang/srt/server_args.py @@ -1299,6 +1299,9 @@ def _generate_cuda_graph_batch_sizes(self): capture_bs = [bs for bs in capture_bs if bs <= self.cuda_graph_max_bs] + if self.cuda_graph_max_bs not in capture_bs: + capture_bs.append(self.cuda_graph_max_bs) + return capture_bs def _generate_piecewise_cuda_graph_tokens(self):