From fe04432b26252cc4960ba8f056c84452b985c058 Mon Sep 17 00:00:00 2001
From: Leonard Lausen <leonard@lausen.nl>
Date: Mon, 10 Aug 2020 12:42:39 -0700
Subject: [PATCH] Revert "drop list support for gluon trainer (#18877)"

This reverts commit d5fdcbf3bae3ea85cb89394147db4b1da0105fb4.
---
 python/mxnet/gluon/trainer.py               | 36 ++++++++-------------
 tests/nightly/dist_async_kvstore.py         |  2 +-
 tests/nightly/dist_device_sync_kvstore.py   |  2 +-
 tests/nightly/dist_sync_kvstore.py          |  6 ++--
 tests/python/unittest/test_gluon.py         |  4 +--
 tests/python/unittest/test_gluon_trainer.py | 24 +++++++-------
 6 files changed, 32 insertions(+), 42 deletions(-)

diff --git a/python/mxnet/gluon/trainer.py b/python/mxnet/gluon/trainer.py
index 7f1a7d09e585..0b947dd7c8d4 100644
--- a/python/mxnet/gluon/trainer.py
+++ b/python/mxnet/gluon/trainer.py
@@ -20,7 +20,6 @@
 """Parameter optimizer."""
 __all__ = ['Trainer']
 
-import sys
 from collections import OrderedDict
 
 from .. import optimizer as opt
@@ -78,34 +77,25 @@ class Trainer(object):
     """
     def __init__(self, params, optimizer, optimizer_params=None, kvstore='device',
                  compression_params=None, update_on_kvstore=None):
-        self._param2name = {}
-        self._param2idx = {}
-        py_version = sys.version_info
-        assert isinstance(params, (dict, OrderedDict)), \
-            'invalid params type: {}. Expected dict type'.format(type(params))
-        names = list(params.keys())
         param_list = []
-        # only python 3.5 requires sorting
-        if py_version[0] == 3 and py_version[1] == 5:
-            names = sorted(names)
-        for name in names:
-            p = params[name]
-            if not isinstance(p, Parameter):
-                raise ValueError(
-                    "First argument must be a dict of Parameters, " \
-                    "got list of %s."%(type(p)))
-            param_list.append(p)
-            # Shared parameters have same uuid; only need to store one of the shared versions
-            if p._uuid in self._param2name:
-                continue
-            self._param2name[p._uuid] = name
-        params = param_list
-
+        if isinstance(params, (dict, OrderedDict)):
+            for key in sorted(list(params.keys())):
+                param_list.append(params[key])
+            params = param_list
+        if not isinstance(params, (list, tuple)):
+            raise ValueError(
+                "First argument must be a list or dict of Parameters, " \
+                "got %s."%(type(params)))
         self._params = []
         # parameters to initialize on the kvstore
         self._contains_sparse_weight = False
         self._contains_sparse_grad = False
+        self._param2idx = {}
         for i, param in enumerate(params):
+            if not isinstance(param, Parameter):
+                raise ValueError(
+                    "First argument must be a list or dict of Parameters, " \
+                    "got list of %s."%(type(param)))
             if param._uuid in self._param2idx:
                 # Shared parameters have same uuid; only need to store one of the shared versions
                 continue
diff --git a/tests/nightly/dist_async_kvstore.py b/tests/nightly/dist_async_kvstore.py
index 4e0fbf789ff2..f1bf13d93d37 100644
--- a/tests/nightly/dist_async_kvstore.py
+++ b/tests/nightly/dist_async_kvstore.py
@@ -31,7 +31,7 @@ def check_trainer_kv_update(weight_stype, update_on_kv):
         x = mx.gluon.Parameter('x', shape=(10,1), lr_mult=1.0, stype=weight_stype)
         x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
         try:
-            trainer = mx.gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 0.1},
+            trainer = mx.gluon.Trainer([x], 'sgd', {'learning_rate': 0.1},
                                        kvstore=kv, update_on_kvstore=update_on_kv)
             trainer._init_kvstore()
             assert trainer._kv_initialized
diff --git a/tests/nightly/dist_device_sync_kvstore.py b/tests/nightly/dist_device_sync_kvstore.py
index c6b526f02228..b7b4e4c71f0e 100644
--- a/tests/nightly/dist_device_sync_kvstore.py
+++ b/tests/nightly/dist_device_sync_kvstore.py
@@ -109,7 +109,7 @@ def check_trainer_kv_update(update_on_kv):
         x = mx.gluon.Parameter('x', shape=(10,1), lr_mult=1.0)
         x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
         try:
-            trainer = mx.gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 0.1},
+            trainer = mx.gluon.Trainer([x], 'sgd', {'learning_rate': 0.1},
                                        kvstore=kv, update_on_kvstore=update_on_kv)
             trainer._init_kvstore()
             assert trainer._kv_initialized
diff --git a/tests/nightly/dist_sync_kvstore.py b/tests/nightly/dist_sync_kvstore.py
index 4a4a5c8d9670..3f5137ba09b9 100644
--- a/tests/nightly/dist_sync_kvstore.py
+++ b/tests/nightly/dist_sync_kvstore.py
@@ -381,7 +381,7 @@ def test_gluon_trainer_type():
     def check_trainer_kv_type(stype, grad_stype, update_on_kv, expected):
         x = mx.gluon.Parameter('x', shape=(10,1), lr_mult=1.0, stype=stype, grad_stype=grad_stype)
         x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
-        trainer = mx.gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 0.1},
+        trainer = mx.gluon.Trainer([x], 'sgd', {'learning_rate': 0.1},
                                    kvstore=kv, update_on_kvstore=update_on_kv)
         try:
             trainer._init_kvstore()
@@ -405,7 +405,7 @@ def check_trainer_step():
         shape = (10, 1)
         x = mx.gluon.Parameter('x', shape=shape)
         x.initialize(ctx=ctx, init='ones')
-        trainer = mx.gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 1.0, 'multi_precision': False}, kvstore=kv)
+        trainer = mx.gluon.Trainer([x], 'sgd', {'learning_rate': 1.0, 'multi_precision': False}, kvstore=kv)
         with mx.autograd.record():
             w = x.data(ctx)
             y = (my_rank + 1) * w
@@ -423,7 +423,7 @@ def check_trainer_sparse_step():
         all_rows = mx.nd.arange(0, shape[0], ctx=ctx)
         x = mx.gluon.Parameter('x', shape=shape, stype='row_sparse', grad_stype='row_sparse')
         x.initialize(ctx=ctx, init='ones')
-        trainer = mx.gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 1.0}, kvstore=kv)
+        trainer = mx.gluon.Trainer([x], 'sgd', {'learning_rate': 1.0}, kvstore=kv)
         with mx.autograd.record():
             w = x.row_sparse_data(all_rows)
             y = (my_rank + 1) * w
diff --git a/tests/python/unittest/test_gluon.py b/tests/python/unittest/test_gluon.py
index 4bd2171dfd18..71ae41c2cfa6 100644
--- a/tests/python/unittest/test_gluon.py
+++ b/tests/python/unittest/test_gluon.py
@@ -70,7 +70,7 @@ def test_sparse_parameter():
     assert len(p.list_grad()) == 2
     # getting row_sparse data without trainer throws an exception
     assertRaises(RuntimeError, p.list_row_sparse_data, row_id)
-    trainer = mx.gluon.Trainer({'p':p}, 'sgd')
+    trainer = mx.gluon.Trainer([p], 'sgd')
     assert len(p.list_row_sparse_data(row_id)) == 2
     weight = p.row_sparse_data(row_id)
     assert weight.context == mx.cpu(1)
@@ -104,7 +104,7 @@ def test_parameter_row_sparse_data():
     dim0 = 4
     x = gluon.Parameter('x', shape=(dim0, 2), stype='row_sparse')
     x.initialize(init='xavier', ctx=[ctx0, ctx1])
-    trainer = gluon.Trainer({'x':x}, 'sgd')
+    trainer = gluon.Trainer([x], 'sgd')
     x_param = x._data[0].copy()
     assert x_param.stype == 'row_sparse'
     row_id_0 = mx.nd.array([0,1], ctx=ctx0)
diff --git a/tests/python/unittest/test_gluon_trainer.py b/tests/python/unittest/test_gluon_trainer.py
index 5c94fc8d003c..d5e99a281aab 100644
--- a/tests/python/unittest/test_gluon_trainer.py
+++ b/tests/python/unittest/test_gluon_trainer.py
@@ -36,7 +36,7 @@ def test_multi_trainer():
     x = gluon.Parameter('x', shape=(10,), stype='row_sparse')
     x.initialize()
     # test set trainer
-    trainer0 = gluon.Trainer({'x':x}, 'sgd')
+    trainer0 = gluon.Trainer([x], 'sgd')
     assert(x._trainer() is trainer0)
     # test unset trainer
     x._set_trainer(None)
@@ -44,13 +44,13 @@ def test_multi_trainer():
     x._set_trainer(trainer0)
     with pytest.raises(RuntimeError):
         # multiple trainers for a sparse Parameter is not allowed
-        trainer1 = gluon.Trainer({'x':x}, 'sgd')
+        trainer1 = gluon.Trainer([x], 'sgd')
 
 @with_seed()
 def test_trainer_with_sparse_grad_on_single_context():
     x = gluon.Parameter('x', shape=(10,), grad_stype='row_sparse')
     x.initialize(ctx=[mx.cpu(0)], init='zeros')
-    trainer = gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 1.0, 'momentum': 0.5})
+    trainer = gluon.Trainer([x], 'sgd', {'learning_rate': 1.0, 'momentum': 0.5})
     with mx.autograd.record():
         for w in x.list_data():
             y = w + 1
@@ -66,7 +66,7 @@ def test_trainer_with_teststore():
     x = gluon.Parameter('x', shape=(10,))
     x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
     kv = mx.kv.create('teststore')
-    trainer = gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 1.0, 'momentum': 0.5}, kvstore=kv)
+    trainer = gluon.Trainer([x], 'sgd', {'learning_rate': 1.0, 'momentum': 0.5}, kvstore=kv)
     with mx.autograd.record():
         for w in x.list_data():
             y = w + 1
@@ -77,14 +77,14 @@ def test_trainer_with_teststore():
     assert (x.data(mx.cpu(1)).asnumpy() == -2).all()
     # Expect exceptions if update_on_kvstore is set to True,
     # because TestStore does not support that
-    invalid_trainer = gluon.Trainer({'x':x}, 'sgd', kvstore=kv, update_on_kvstore=True)
+    invalid_trainer = gluon.Trainer([x], 'sgd', kvstore=kv, update_on_kvstore=True)
     pytest.raises(ValueError, invalid_trainer._init_kvstore)
 
 @with_seed()
 def test_trainer():
     x = gluon.Parameter('x', shape=(10,))
     x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
-    trainer = gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 1.0, 'momentum': 0.5})
+    trainer = gluon.Trainer([x], 'sgd', {'learning_rate': 1.0, 'momentum': 0.5})
     with mx.autograd.record():
         for w in x.list_data():
             y = w + 1
@@ -119,7 +119,7 @@ def test_trainer():
 
     x = gluon.Parameter('x', shape=(10,))
     x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
-    trainer2 = gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 1.0, 'momentum': 0.5},
+    trainer2 = gluon.Trainer([x], 'sgd', {'learning_rate': 1.0, 'momentum': 0.5},
                              update_on_kvstore=False)
     with mx.autograd.record():
         for i, w in enumerate(x.list_data()):
@@ -139,7 +139,7 @@ def test_trainer_save_load():
 
     x = gluon.Parameter('x', shape=(10,), lr_mult=1.0)
     x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
-    trainer = gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 0.1})
+    trainer = gluon.Trainer([x], 'sgd', {'learning_rate': 0.1})
     with mx.autograd.record():
         for w in x.list_data():
             y = w + 1
@@ -158,7 +158,7 @@ def test_trainer_sparse_save_load():
     x = gluon.Parameter('x', shape=(10, 1), lr_mult=1.0,
                         stype='row_sparse', grad_stype='row_sparse')
     x.initialize(ctx=[mx.cpu(0)], init='zeros')
-    trainer = gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 0.1})
+    trainer = gluon.Trainer([x], 'sgd', {'learning_rate': 0.1})
     all_rows = mx.nd.arange(0, 10, ctx=mx.cpu(0))
     with mx.autograd.record():
         for w in x.list_row_sparse_data(all_rows):
@@ -257,7 +257,7 @@ def test_trainer_sparse_kv():
     def check_trainer_sparse_kv(kv, stype, grad_stype, update_on_kv, expected):
         x = mx.gluon.Parameter('x', shape=(10,1), lr_mult=1.0, stype=stype, grad_stype=grad_stype)
         x.initialize(ctx=[mx.cpu(0), mx.cpu(1)], init='zeros')
-        trainer = gluon.Trainer({'x':x}, 'sgd', {'learning_rate': 0.1},
+        trainer = gluon.Trainer([x], 'sgd', {'learning_rate': 0.1},
                                 kvstore=kv, update_on_kvstore=update_on_kv)
         all_rows = mx.nd.arange(0, 10, ctx=mx.cpu(0))
         try:
@@ -297,7 +297,7 @@ def test_trainer_lr_sched():
     factor = 0.1
     lr = 1
     lr_sched = mx.lr_scheduler.FactorScheduler(freq, factor=factor, base_lr=lr)
-    trainer = gluon.Trainer({'x':x}, 'sgd', {'learning_rate': lr, 'lr_scheduler': lr_sched})
+    trainer = gluon.Trainer([x], 'sgd', {'learning_rate': lr, 'lr_scheduler': lr_sched})
     for i in range(10):
         with mx.autograd.record():
             for w in x.list_data():
@@ -316,7 +316,7 @@ def test_trainer_lr_sched():
     factor = 0.1
     lr = 1
     lr_sched = mx.lr_scheduler.FactorScheduler(freq, factor=factor, base_lr=lr)
-    trainer = gluon.Trainer({'x':x}, 'sgd', {'learning_rate': lr, 'lr_scheduler': lr_sched},
+    trainer = gluon.Trainer([x], 'sgd', {'learning_rate': lr, 'lr_scheduler': lr_sched},
                             update_on_kvstore=False)
     for i in range(10):
         with mx.autograd.record():