sgl-project · merrymercy · May 16, 2025 · May 8, 2025 · May 12, 2025 · May 14, 2025
@@ -269,6 +269,7 @@ def _maybe_prepare_dp_attn_batch(batch: ScheduleBatch, model_runner):
             batch,
             dp_size=model_runner.server_args.dp_size,
             attn_tp_size=1,
+            moe_dense_tp_size=model_runner.server_args.moe_dense_tp_size,
             tp_cpu_group=model_runner.tp_group.cpu_group,
             get_idle_batch=None,
             disable_cuda_graph=model_runner.server_args.disable_cuda_graph,

@@ -142,16 +142,6 @@ def get_local_attention_dp_size():
     return _LOCAL_ATTN_DP_SIZE
 
 
-def get_local_attention_dp_rank():
-    assert _LOCAL_ATTN_DP_RANK is not None, "dp attention not initialized!"
-    return _LOCAL_ATTN_DP_RANK
-
-
-def get_local_attention_dp_size():
-    assert _LOCAL_ATTN_DP_SIZE is not None, "dp attention not initialized!"
-    return _LOCAL_ATTN_DP_SIZE
-
-
 @contextmanager
 def disable_dp_size():
     """Patch the tp group temporarily until this function ends.