From 8d552017e9b7adefa0e3988c24e2ae6cfa0d8870 Mon Sep 17 00:00:00 2001 From: Olatunji Ruwase Date: Fri, 16 Apr 2021 14:16:25 +0000 Subject: [PATCH 1/2] Fix UnboundLocalError --- deepspeed/runtime/zero/partition_parameters.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deepspeed/runtime/zero/partition_parameters.py b/deepspeed/runtime/zero/partition_parameters.py index 0f2741251bb6..24c9c02e6096 100755 --- a/deepspeed/runtime/zero/partition_parameters.py +++ b/deepspeed/runtime/zero/partition_parameters.py @@ -852,7 +852,7 @@ def _partition_gradient(self, param, partition_buffer=None, accumulate=False): # elements)) #print("after partition gradients") - param.grad.data = dest_tensor_full_buffer.data + param.grad.data = dest_tensor.data see_memory_usage("After partitioning gradients", force=False) From 8e89a829843db1f3c1fda608db3539ae55b1195d Mon Sep 17 00:00:00 2001 From: Olatunji Ruwase Date: Fri, 16 Apr 2021 20:23:54 +0000 Subject: [PATCH 2/2] Get full partition size --- deepspeed/runtime/zero/partition_parameters.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/deepspeed/runtime/zero/partition_parameters.py b/deepspeed/runtime/zero/partition_parameters.py index 24c9c02e6096..42cdcd645f4d 100755 --- a/deepspeed/runtime/zero/partition_parameters.py +++ b/deepspeed/runtime/zero/partition_parameters.py @@ -808,17 +808,12 @@ def _partition_gradient(self, param, partition_buffer=None, accumulate=False): start = partition_size * rank end = start + partition_size - dest_tensor = partition_buffer.view(-1).narrow(0, 0, partition_size) + dest_tensor_full_buffer = partition_buffer.view(-1).narrow(0, 0, partition_size) #print("before partition gradients") if start < param.ds_numel: elements = min(param.ds_numel - start, partition_size) - dest_tensor_full_buffer = partition_buffer.view(-1).narrow( - 0, - 0, - partition_size) - dest_tensor = dest_tensor_full_buffer.narrow(0, 0, elements) src_tensor = param.grad.view(-1).narrow(0, start, elements) @@ -852,7 +847,7 @@ def _partition_gradient(self, param, partition_buffer=None, accumulate=False): # elements)) #print("after partition gradients") - param.grad.data = dest_tensor.data + param.grad.data = dest_tensor_full_buffer.data see_memory_usage("After partitioning gradients", force=False)