apache · eric-haibin-lin · Oct 29, 2018 · Sep 25, 2018 · Sep 25, 2018 · Sep 25, 2018
diff --git a/python/mxnet/module/base_module.py b/python/mxnet/module/base_module.py
@@ -625,7 +625,7 @@ def output_shapes(self):
     ################################################################################
     # Parameters of a module
     ################################################################################
-    def get_params(self):
+    def get_params(self, copy_to_cpu=True):
         """Gets parameters, those are potentially copies of the the actual parameters used
         to do computation on the device.
 

diff --git a/python/mxnet/module/bucketing_module.py b/python/mxnet/module/bucketing_module.py
@@ -161,7 +161,7 @@ def output_shapes(self):
         assert self.binded
         return self._curr_module.output_shapes
 
-    def get_params(self):
+    def get_params(self, copy_to_cpu=True):
         """Gets current parameters.
 
         Returns

diff --git a/python/mxnet/module/executor_group.py b/python/mxnet/module/executor_group.py
@@ -412,7 +412,7 @@ def set_params(self, arg_params, aux_params, allow_extra=False):
         for exec_ in self.execs:
             exec_.copy_params_from(arg_params, aux_params, allow_extra_params=allow_extra)
 
-    def get_params(self, arg_params, aux_params):
+    def get_params(self, arg_params, aux_params, copy_to_cpu):
         """ Copy data from each executor to `arg_params` and `aux_params`.
 
         Parameters
@@ -421,17 +421,29 @@ def get_params(self, arg_params, aux_params):
             Target parameter arrays.
         aux_params : list of NDArray
             Target aux arrays.
+        copy_to_cpu : boolean
+            Whether or not to copy parameters to CPU. (default to 'true')
 
         Notes
         -----
         - This function will inplace update the NDArrays in arg_params and aux_params.
         """
         for name, block in zip(self.param_names, self.param_arrays):
-            weight = sum(w.copyto(ctx.cpu()) for w in block) / len(block)
+            if copy_to_cpu:
+                context = ctx.cpu()
+            else:
+                context = block[0].context
+            weight = sum(w.copyto(context) for w in block) / len(block)
             weight.astype(arg_params[name].dtype).copyto(arg_params[name])
+            arg_params[name] = arg_params[name].as_in_context(context)
         for name, block in zip(self.aux_names, self.aux_arrays):
-            weight = sum(w.copyto(ctx.cpu()) for w in block) / len(block)
+            if copy_to_cpu:
+                context = ctx.cpu()
+            else:
+                context = block[0].context
+            weight = sum(w.copyto(context) for w in block) / len(block)
             weight.astype(aux_params[name].dtype).copyto(aux_params[name])
+            aux_params[name] = aux_params[name].as_in_context(context)
 
     def forward(self, data_batch, is_train=None):
         """Split `data_batch` according to workload and run forward on each devices.

diff --git a/python/mxnet/module/module.py b/python/mxnet/module/module.py
@@ -242,7 +242,7 @@ def output_shapes(self):
         assert self.binded
         return self._exec_group.get_output_shapes()
 
-    def get_params(self):
+    def get_params(self, copy_to_cpu=True):
         """Gets current parameters.
 
         Returns
@@ -252,8 +252,8 @@ def get_params(self):
         """
         assert self.binded and self.params_initialized
 
-        if self._params_dirty:
-            self._sync_params_from_devices()
+        if not copy_to_cpu or self._params_dirty:
+            self._sync_params_from_devices(copy_to_cpu)
         return (self._arg_params, self._aux_params)
 
     def init_params(self, initializer=Uniform(0.01), arg_params=None, aux_params=None,
@@ -495,7 +495,7 @@ def init_optimizer(self, kvstore='local', optimizer='sgd',
             return
 
         if self._params_dirty:
-            self._sync_params_from_devices()
+            self._sync_params_from_devices(copy_to_cpu=True)
 
         (kvstore, update_on_kvstore) = \
                 _create_kvstore(kvstore, len(self._context), self._arg_params)
@@ -772,15 +772,15 @@ def update_metric(self, eval_metric, labels, pre_sliced=False):
         """
         self._exec_group.update_metric(eval_metric, labels, pre_sliced)
 
-    def _sync_params_from_devices(self):
+    def _sync_params_from_devices(self, copy_to_cpu):
         """Synchronizes parameters from devices to CPU. This function should be called after
         calling `update` that updates the parameters on the devices, before one can read the
         latest parameters from ``self._arg_params`` and ``self._aux_params``.
 
         For row_sparse parameters on devices, ther are pulled from KVStore with all row ids.
 
         """
-        self._exec_group.get_params(self._arg_params, self._aux_params)
+        self._exec_group.get_params(self._arg_params, self._aux_params, copy_to_cpu)
         if self._kvstore and self._update_on_kvstore:
             for param_name, param_val in sorted(self._arg_params.items()):
                 if param_val.stype == 'row_sparse':

diff --git a/python/mxnet/module/python_module.py b/python/mxnet/module/python_module.py
@@ -93,7 +93,7 @@ def output_shapes(self):
     ################################################################################
     # Parameters of a module
     ################################################################################
-    def get_params(self):
+    def get_params(self, copy_to_cpu=True):
         """Gets parameters, those are potentially copies of the the actual parameters used
         to do computation on the device. Subclass should override this method if contains
         parameters.

diff --git a/python/mxnet/module/sequential_module.py b/python/mxnet/module/sequential_module.py
@@ -149,7 +149,7 @@ def output_shapes(self):
         assert self.binded
         return self._modules[-1].output_shapes
 
-    def get_params(self):
+    def get_params(self, copy_to_cpu=True):
         """Gets current parameters.
 
         Returns

diff --git a/src/io/iter_image_recordio_2.cc b/src/io/iter_image_recordio_2.cc
@@ -285,9 +285,9 @@ inline bool ImageRecordIOParser2<DType>::ParseNext(DataBatch *out) {
     shape_vec.push_back(param_.label_width);
     TShape label_shape(shape_vec.begin(), shape_vec.end());
 
-    out->data.at(0) = NDArray(data_shape, Context::CPUPinned(0), false,
+    out->data.at(0) = NDArray(data_shape, Context::CPU(0), false,
       mshadow::DataType<DType>::kFlag);
-    out->data.at(1) = NDArray(label_shape, Context::CPUPinned(0), false,
+    out->data.at(1) = NDArray(label_shape, Context::CPU(0), false,
       mshadow::DataType<real_t>::kFlag);
     unit_size_[0] = param_.data_shape.Size();
     unit_size_[1] = param_.label_width;

diff --git a/src/kvstore/comm.h b/src/kvstore/comm.h
@@ -43,7 +43,7 @@ namespace kvstore {
 class Comm {
  public:
   Comm() {
-    pinned_ctx_ = Context::CPUPinned(0);
+    pinned_ctx_ = Context::CPU(0);
   }
   virtual ~Comm() { }
   /**

diff --git a/src/kvstore/kvstore_nccl.h b/src/kvstore/kvstore_nccl.h
@@ -64,7 +64,7 @@ class KVStoreNCCL : public KVStoreLocal {
   KVStoreNCCL() : KVStoreLocal() {
     // Due to aggregation, we do not use the Comm interface
     comm_ = nullptr;
-    pinned_ctx_ = Context::CPUPinned(0);
+    pinned_ctx_ = Context::CPU(0);
     inited_ = false;
   }