From ed7f6e56a4e372d5d460031186145065f5657893 Mon Sep 17 00:00:00 2001
From: Lai Wei <royweilai@gmail.com>
Date: Wed, 3 Apr 2019 14:18:13 -0700
Subject: [PATCH] [MXNet-1340][Fit API]Update train stats (#14494)

* add train history

* update history

* update test

* avoid calling empty methods

* remove train history object

* fix pylint

* add unit test

* fix test

* update categorize handlers
---
 python/mxnet/gluon/estimator/estimator.py     | 147 +++++++------
 python/mxnet/gluon/estimator/event_handler.py | 102 +++++----
 python/mxnet/gluon/trainer.py                 |   7 +
 tests/python/unittest/test_gluon_estimator.py | 193 +++++++++++-------
 .../unittest/test_gluon_event_handler.py      |  12 +-
 5 files changed, 280 insertions(+), 181 deletions(-)

diff --git a/python/mxnet/gluon/estimator/estimator.py b/python/mxnet/gluon/estimator/estimator.py
index e759fa75e290..c5da0c0e5071 100644
--- a/python/mxnet/gluon/estimator/estimator.py
+++ b/python/mxnet/gluon/estimator/estimator.py
@@ -22,7 +22,7 @@
 import copy
 import warnings
 
-from .event_handler import LoggingHandler
+from .event_handler import EventHandler, LoggingHandler
 from ... import gluon, autograd
 from ...context import Context, cpu, gpu, num_gpus
 from ...io import DataIter
@@ -39,27 +39,26 @@ class Estimator(object):
 
     Parameters
     ----------
-    loss : Loss or list of Loss
+    loss : gluon.loss.Loss or list of gluon.loss.Loss
         Loss(objective functions) to calculate during training
     metrics : EvalMetric or list of EvalMetric
         Metrics for evaluating models
     initializer : Initializer
         initializer to initialize the network
-    trainers : Trainer or list of Trainer
-        Trainers to apply optimizers on network parameters
+    trainer : Trainer
+        Trainer to apply optimizer on network parameters
     context : Context or list of Context
         devices to run the training on
     """
 
     def __init__(self, net,
-                 loss=None,
+                 loss,
                  metrics=None,
                  initializer=None,
-                 trainers=None,
+                 trainer=None,
                  context=None):
 
         self.net = net
-        self.stop_training = False
 
         if isinstance(loss, gluon.loss.Loss):
             self.loss = [loss]
@@ -86,27 +85,14 @@ def __init__(self, net,
 
         # store training statistics
         self.train_stats = {}
-        self.train_stats['epochs'] = []
-        self.train_stats['learning_rate'] = []
-        # current step of the epoch
-        self.train_stats['step'] = ''
-        for metric in self.train_metrics:
-            # record a history of metrics over each epoch
-            self.train_stats['train_' + metric.name] = []
-            # only record the latest metric numbers after each batch
-            self.train_stats['batch_' + metric.name] = 0.
-        for metric in self.val_metrics:
-            self.train_stats['val_' + metric.name] = []
+
+        # separate train and validation
         self.train_loss_metrics = []
         self.val_loss_metrics = []
         # using the metric wrapper for loss to record loss value
         for l in self.loss:
             self.train_loss_metrics.append(Loss(l.name))
             self.val_loss_metrics.append(Loss(l.name))
-            self.train_stats['train_' + l.name] = []
-            self.train_stats['val_' + l.name] = []
-            # only record the latest loss numbers after each batch
-            self.train_stats['batch_' + l.name] = 0.
 
         # handle context
         if isinstance(context, Context):
@@ -127,7 +113,6 @@ def __init__(self, net,
             raise ValueError("context must be a Context or a list of Context, "
                              "refer to mxnet.Context:{}".format(context))
 
-
         # initialize the network
         self.initializer = initializer
         if self.initializer:
@@ -135,7 +120,7 @@ def __init__(self, net,
                 # if already initialized, re-init with user specified initializer
                 warnings.warn("Network already initialized, re-initializing with %s. "
                               "You don't need to pass initializer if you already "
-                              "initialized your net."% type(self.initializer).__name__)
+                              "initialized your net." % type(self.initializer).__name__)
                 self.net.initialize(init=self.initializer, ctx=self.context, force_reinit=True)
             else:
                 # initialize with user specified initializer
@@ -144,16 +129,17 @@ def __init__(self, net,
             if not self._is_initialized():
                 self.net.initialize(ctx=self.context)
 
-        # handle trainers
-        if isinstance(trainers, gluon.Trainer):
-            self.trainers = [trainers]
-        elif not trainers:
+        # handle trainer
+        if not trainer:
             warnings.warn("No trainer specified, default SGD optimizer "
                           "with learning rate 0.001 is used.")
-            self.trainers = [gluon.Trainer(self.net.collect_params(),
-                                           'sgd', {'learning_rate': 0.001})]
+            self.trainer = gluon.Trainer(self.net.collect_params(),
+                                         'sgd', {'learning_rate': 0.001})
+        elif not isinstance(trainer, gluon.Trainer):
+            raise ValueError("Trainer must be a Gluon Trainer instance, refer to "
+                             "gluon.Trainer:{}".format(trainer))
         else:
-            raise ValueError("Invalid trainer specified, please provide a valid gluon.Trainer")
+            self.trainer = trainer
 
     def _is_initialized(self):
         param_dict = self.net.collect_params()
@@ -212,8 +198,12 @@ def evaluate(self,
             # update metrics
             for metric in self.val_metrics:
                 metric.update(label, pred)
+                name, value = metric.get()
+                self.train_stats['val_' + name] = value
             for loss, loss_metric, in zip(losses, self.val_loss_metrics):
                 loss_metric.update(0, [l for l in loss])
+                name, value = loss_metric.get()
+                self.train_stats['val_' + name] = value
 
     def fit(self, train_data,
             val_data=None,
@@ -241,27 +231,38 @@ def fit(self, train_data,
             from a data batch and load into contexts(devices)
         """
 
-
-        self.epochs = epochs
+        self.max_epoch = epochs
         if not batch_size:
-            batch_size = 32 * len(self.context)
+            self.batch_size = 32 * len(self.context)
+        else:
+            self.batch_size = batch_size
+        self.stop_training = False
+        self.samples = None
+        self.batch_idx = 0
 
         event_handlers = event_handlers or []
         # provide default logging handler
         if not event_handlers or \
                 not any(isinstance(handler, LoggingHandler) for handler in event_handlers):
-            event_handlers.append(LoggingHandler(self))
+            event_handlers.append(LoggingHandler())
 
-        # training begin
+        train_begin, epoch_begin, batch_begin, \
+        batch_end, epoch_end, train_end = self._categorize_handlers(event_handlers)
+
+        # passing estimator to event handlers so they can access estimator information
+        # when a event is triggered
         for handler in event_handlers:
+            handler.estimator = self
+
+        # training begin
+        for handler in train_begin:
             handler.train_begin()
 
-        for epoch in range(epochs):
+        for epoch in range(self.max_epoch):
             # epoch begin
-            self.train_stats['epochs'].append(epoch)
-            self.train_stats['learning_rate'].append(self.trainers[0].learning_rate)
+            self.current_epoch = epoch
 
-            for handler in event_handlers:
+            for handler in epoch_begin:
                 handler.epoch_begin()
 
             for metric in self.train_metrics + self.train_loss_metrics:
@@ -282,7 +283,7 @@ def fit(self, train_data,
                     data, label = batch_fn(batch, self.context)
 
                 # batch begin
-                for handler in event_handlers:
+                for handler in batch_begin:
                     handler.batch_begin()
 
                 with autograd.record():
@@ -298,42 +299,64 @@ def fit(self, train_data,
                 # update train metrics
                 for metric in self.train_metrics:
                     metric.update(label, pred)
-                    self.train_stats['batch_' + metric.name] = metric.get()[1]
+                    # get metric name and current value and update train stats
+                    name, value = metric.get()
+                    self.train_stats['train_' + name] = value
+
+                # update loss
                 for loss, loss_metric, in zip(losses, self.train_loss_metrics):
                     loss_metric.update(0, [l for l in loss])
-                    self.train_stats['batch_' + loss_metric.name] = loss_metric.get()[1]
-
-                try:
-                    completed_samples = len(train_data._dataset) if i == len(train_data._dataset) - 1 \
-                                        else batch_size * (i + 1)
-                    # We need to check if this is the last batch in the current epoch and select
-                    # the value to print appropriately
-                    self.train_stats['step'] = "{}/{}".format(completed_samples, len(train_data._dataset))
-                except AttributeError:
-                    self.train_stats['step'] = i
+                    name, value = loss_metric.get()
+                    self.train_stats['train_' + name] = value
 
-                for trainer in self.trainers:
-                    trainer.step(batch_size)
+                self.batch_idx = i
+                # record trained samples v.s. total samples if using Gluon DataLoader
+                if isinstance(train_data, gluon.data.DataLoader):
+                    self.samples = "{}/{}".format(self.batch_size * (i + 1), len(train_data._dataset))
 
+                self.trainer.step(self.batch_size)
                 # batch end
-                for handler in event_handlers:
+                for handler in batch_end:
                     handler.batch_end()
 
             if val_data:
                 self.evaluate(val_data, batch_fn)
 
-            for metric in self.train_metrics + self.train_loss_metrics:
-                self.train_stats['train_' + metric.name].append(metric.get()[1])
-            for metric in self.val_metrics + self.val_loss_metrics:
-                self.train_stats['val_' + metric.name].append(metric.get()[1])
-
             # epoch end
-            for handler in event_handlers:
+            for handler in epoch_end:
                 handler.epoch_end()
 
             if self.stop_training:
                 break
 
         # train end
-        for handler in event_handlers:
+        for handler in train_end:
             handler.train_end()
+
+    def _categorize_handlers(self, event_handlers):
+        """
+        categorize handlers into 6 event lists to avoid calling empty methods
+        for example, only event handlers with train_begin method
+        implemented will be called at train begin
+        """
+
+        train_begin = []
+        epoch_begin = []
+        batch_begin = []
+        batch_end = []
+        epoch_end = []
+        train_end = []
+        for handler in event_handlers:
+            if not handler.__class__.train_begin == EventHandler.train_begin:
+                train_begin.append(handler)
+            if not handler.__class__.epoch_begin == EventHandler.epoch_begin:
+                epoch_begin.append(handler)
+            if not handler.__class__.batch_begin == EventHandler.batch_begin:
+                batch_begin.append(handler)
+            if not handler.__class__.batch_end == EventHandler.batch_end:
+                batch_end.append(handler)
+            if not handler.__class__.epoch_end == EventHandler.epoch_end:
+                epoch_end.append(handler)
+            if not handler.__class__.train_end == EventHandler.train_end:
+                train_end.append(handler)
+        return train_begin, epoch_begin, batch_begin, batch_end, epoch_end, train_end
diff --git a/python/mxnet/gluon/estimator/event_handler.py b/python/mxnet/gluon/estimator/event_handler.py
index c59644e8f726..781007464954 100644
--- a/python/mxnet/gluon/estimator/event_handler.py
+++ b/python/mxnet/gluon/estimator/event_handler.py
@@ -40,7 +40,16 @@ class EventHandler(object):
         estimator : Estimator
             The :py:class:`Estimator` to get training statistics
         """
-    def __init__(self, estimator):
+
+    def __init__(self):
+        self._estimator = None
+
+    @property
+    def estimator(self):
+        return self._estimator
+
+    @estimator.setter
+    def estimator(self, estimator):
         self._estimator = estimator
 
     def train_begin(self):
@@ -78,8 +87,8 @@ class LoggingHandler(EventHandler):
         file location to save the logs
     """
 
-    def __init__(self, estimator, file_name=None, file_location=None, ):
-        super(LoggingHandler, self).__init__(estimator)
+    def __init__(self, file_name=None, file_location=None):
+        super(LoggingHandler, self).__init__()
         self.logger = logging.getLogger(__name__)
         self.logger.setLevel(logging.INFO)
         stream_handler = logging.StreamHandler()
@@ -92,22 +101,37 @@ def __init__(self, estimator, file_name=None, file_location=None, ):
             self.logger.addHandler(file_handler)
 
     def train_begin(self):
-        pass
+        self.train_start = time.time()
+        self.logger.info("Training begin: using optimizer %s "
+                         "with current learning rate %.4f ",
+                         self.estimator.trainer.optimizer.__class__.__name__,
+                         self.estimator.trainer.learning_rate)
+        self.logger.info("Train for %d epochs.", self.estimator.max_epoch)
 
     def train_end(self):
-        pass
+        train_time = time.time() - self.train_start
+        epoch = self.estimator.current_epoch
+        msg = 'Train finished using total %ds at epoch %d. ' % (train_time, epoch)
+        # log every result in train stats including train/validation loss & metrics
+        for key in self.estimator.train_stats:
+            msg += '%s : %.4f ' % (key, self.estimator.train_stats[key])
+        self.logger.info(msg)
 
     def batch_begin(self):
         self.batch_start = time.time()
 
     def batch_end(self):
         batch_time = time.time() - self.batch_start
-        epoch = self._estimator.train_stats['epochs'][-1]
-        step = self._estimator.train_stats['step']
-        msg = '[Epoch %d] [Step %s] time/step: %.3fs ' % (epoch, step, batch_time)
-        for key in self._estimator.train_stats.keys():
-            if key.startswith('batch_'):
-                msg += key[6:] + ': ' + '%.4f ' % self._estimator.train_stats[key]
+        epoch = self.estimator.current_epoch
+        batch = self.estimator.batch_idx
+        msg = '[Epoch %d] [Batch %d] ' % (epoch, batch)
+        if self.estimator.samples:
+            msg += '[Samples %s] ' % (self.estimator.samples)
+        msg += 'time/batch: %.3fs ' % batch_time
+        for key in self.estimator.train_stats:
+            # only log current training loss & metric after each batch
+            if key.startswith('train_'):
+                msg += key + ': ' + '%.4f ' % self.estimator.train_stats[key]
         self.logger.info(msg)
 
     def epoch_begin(self):
@@ -115,11 +139,11 @@ def epoch_begin(self):
 
     def epoch_end(self):
         epoch_time = time.time() - self.epoch_start
-        epoch = self._estimator.train_stats['epochs'][-1]
+        epoch = self.estimator.current_epoch
         msg = '\n[Epoch %d] finished in %.3fs: ' % (epoch, epoch_time)
-        for key in self._estimator.train_stats.keys():
-            if key.startswith('train_') or key.startswith('val_'):
-                msg += key + ': ' + '%.4f ' % self._estimator.train_stats[key][epoch]
+        # log every result in train stats including train/validation loss & metrics
+        for key in self.estimator.train_stats:
+            msg += '%s : %.4f ' % (key, self.estimator.train_stats[key])
         self.logger.info(msg)
 
 
@@ -148,14 +172,14 @@ class CheckpointHandler(EventHandler):
         intervals between saving the network
     """
 
-    def __init__(self, estimator,
+    def __init__(self,
                  filepath,
-                 monitor='val_loss',
+                 monitor='val_accuracy',
                  verbose=0,
                  save_best_only=False,
                  mode='auto',
                  period=1):
-        super(CheckpointHandler, self).__init__(estimator)
+        super(CheckpointHandler, self).__init__()
         self.monitor = monitor
         self.verbose = verbose
         self.filepath = filepath
@@ -186,7 +210,7 @@ def __init__(self, estimator,
                 self.best = np.Inf
 
     def epoch_end(self, ):
-        epoch = self._estimator.train_stats['epochs'][-1]
+        epoch = self.estimator.current_epoch
         # add extension for weights
         if '.params' not in self.filepath:
             self.filepath += '.params'
@@ -194,20 +218,21 @@ def epoch_end(self, ):
         if self.epochs_since_last_save >= self.period:
             self.epochs_since_last_save = 0
             if self.save_best_only:
-                # check if monitor exists in train_stats
-                if self.monitor not in self._estimator.train_stats:
-                    warnings.warn(RuntimeWarning('Unable to find %s in training statistics, make sure'
-                                                 'you are passing one of the metric names as monitor', self.monitor))
-                    self._estimator.net.save_parameters(self.filepath)
+                # check if monitor exists in train stats
+                if self.monitor not in self.estimator.train_stats:
+                    warnings.warn(RuntimeWarning('Unable to find %s in training statistics, make sure the monitor value'
+                                                 'starts with `train_ `or `val_` and contains loss/metric name, ',
+                                                 'for example val_accuracy', self.monitor))
+                    self.estimator.net.save_parameters(self.filepath)
                 else:
-                    current = self._estimator.train_stats[self.monitor][-1]
+                    current = self.estimator.train_stats[self.monitor]
                     if self.monitor_op(current, self.best):
                         if self.verbose > 0:
                             self.logger.info('\n[Epoch %d] %s improved from %0.5f to %0.5f,'
                                              ' saving model to %s',
                                              epoch, self.monitor, self.best, current, self.filepath)
                         self.best = current
-                        self._estimator.net.save_parameters(self.filepath)
+                        self.estimator.net.save_parameters(self.filepath)
                     else:
                         if self.verbose > 0:
                             self.logger.info('\n[Epoch %d] %s did not improve from %0.5f, skipping save model',
@@ -215,7 +240,7 @@ def epoch_end(self, ):
             else:
                 if self.verbose > 0:
                     logging.info('\nEpoch %d: saving model to %s', epoch, self.filepath)
-                self._estimator.net.save_parameters(self.filepath)
+                self.estimator.net.save_parameters(self.filepath)
 
 
 class EarlyStoppingHandler(EventHandler):
@@ -238,15 +263,14 @@ class EarlyStoppingHandler(EventHandler):
         baseline value to compare the monitored value with
     """
 
-    def __init__(self, estimator,
-                 monitor='val_loss',
+    def __init__(self,
+                 monitor='val_accuracy',
                  min_delta=0,
                  patience=0,
                  mode='auto',
                  baseline=None):
-        super(EarlyStoppingHandler, self).__init__(estimator)
+        super(EarlyStoppingHandler, self).__init__()
 
-        self._estimator = estimator
         self.monitor = monitor
         self.baseline = baseline
         self.patience = patience
@@ -284,15 +308,13 @@ def train_begin(self):
             self.best = np.Inf if self.monitor_op == np.less else -np.Inf
 
     def epoch_end(self):
-        epoch = self._estimator.train_stats['epochs'][-1]
-        if self.monitor not in self._estimator.train_stats:
-            warnings.warn(RuntimeWarning('Unable to find %s in training statistics, make sure'
-                                         'you are passing one of the metric names as monitor', self.monitor))
+        epoch = self.estimator.current_epoch
+        if self.monitor not in self.estimator.train_stats:
+            warnings.warn(RuntimeWarning('Unable to find %s in training statistics, make sure the monitor value'
+                                         'starts with `train_ `or `val_` and contains loss/metric name, ',
+                                         'for example val_accuracy', self.monitor))
         else:
-            current = self._estimator.train_stats[self.monitor][-1]
-            if current is None:
-                return
-
+            current = self.estimator.train_stats[self.monitor]
             if self.monitor_op(current - self.min_delta, self.best):
                 self.best = current
                 self.wait = 0
@@ -300,7 +322,7 @@ def epoch_end(self):
                 self.wait += 1
                 if self.wait >= self.patience:
                     self.stopped_epoch = epoch
-                    self._estimator.stop_training = True
+                    self.estimator.stop_training = True
 
     def train_end(self):
         if self.stopped_epoch > 0:
diff --git a/python/mxnet/gluon/trainer.py b/python/mxnet/gluon/trainer.py
index 8060f38ac2aa..44e895407ccf 100644
--- a/python/mxnet/gluon/trainer.py
+++ b/python/mxnet/gluon/trainer.py
@@ -259,6 +259,13 @@ def learning_rate(self):
         else:
             return self._optimizer.learning_rate
 
+    @property
+    def optimizer(self):
+        if isinstance(self._optimizer, opt.Optimizer):
+            return self._optimizer
+        else:
+            raise UserWarning("Optimizer has not been initialized yet")
+
     def set_learning_rate(self, lr):
         """Sets a new learning rate of the optimizer.
 
diff --git a/tests/python/unittest/test_gluon_estimator.py b/tests/python/unittest/test_gluon_estimator.py
index 85e61ceb364d..25a410e93479 100644
--- a/tests/python/unittest/test_gluon_estimator.py
+++ b/tests/python/unittest/test_gluon_estimator.py
@@ -17,14 +17,15 @@
 
 ''' Unit tests for Gluon Estimator '''
 
-import unittest
 import sys
+import unittest
 import warnings
-from nose.tools import assert_raises
+
 import mxnet as mx
 from mxnet import gluon
 from mxnet.gluon import nn
-from mxnet.gluon.estimator import estimator
+from mxnet.gluon.estimator import Estimator, EventHandler
+from nose.tools import assert_raises
 
 
 def get_model():
@@ -43,11 +44,11 @@ def test_fit():
     acc = mx.metric.Accuracy()
     net.initialize(ctx=ctx)
     trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001})
-    est = estimator.Estimator(net=net,
-                              loss=loss,
-                              metrics=acc,
-                              trainers=trainer,
-                              context=ctx)
+    est = Estimator(net=net,
+                    loss=loss,
+                    metrics=acc,
+                    trainer=trainer,
+                    context=ctx)
     in_data = mx.nd.random.uniform(shape=(10, 3))
     out_data = mx.nd.random.uniform(shape=(10, 4))
     # Input dataloader
@@ -80,11 +81,11 @@ def test_validation():
     acc = mx.metric.Accuracy()
     net.initialize(ctx=ctx)
     trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001})
-    est = estimator.Estimator(net=net,
-                              loss=loss,
-                              metrics=acc,
-                              trainers=trainer,
-                              context=ctx)
+    est = Estimator(net=net,
+                    loss=loss,
+                    metrics=acc,
+                    trainer=trainer,
+                    context=ctx)
     in_data = mx.nd.random.uniform(shape=(10, 3))
     out_data = mx.nd.random.uniform(shape=(10, 4))
     # Input dataloader
@@ -125,10 +126,10 @@ def test_initializer():
     loss = gluon.loss.L2Loss()
     acc = mx.metric.Accuracy()
     # no initializer
-    est = estimator.Estimator(net=net,
-                              loss=loss,
-                              metrics=acc,
-                              context=ctx)
+    est = Estimator(net=net,
+                    loss=loss,
+                    metrics=acc,
+                    context=ctx)
     est.fit(train_data=train_data,
             epochs=num_epochs,
             batch_size=batch_size)
@@ -139,12 +140,12 @@ def test_initializer():
     trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001})
     # catch reinit warning
     with warnings.catch_warnings(record=True) as w:
-        est = estimator.Estimator(net=net,
-                                  loss=loss,
-                                  metrics=acc,
-                                  initializer=mx.init.MSRAPrelu(),
-                                  trainers=trainer,
-                                  context=ctx)
+        est = Estimator(net=net,
+                        loss=loss,
+                        metrics=acc,
+                        initializer=mx.init.MSRAPrelu(),
+                        trainer=trainer,
+                        context=ctx)
         assert 'Network already initialized' in str(w[-1].message)
     est.fit(train_data=train_data,
             epochs=num_epochs,
@@ -167,10 +168,10 @@ def test_trainer():
     net.initialize(ctx=ctx)
     # input no trainer
     with warnings.catch_warnings(record=True) as w:
-        est = estimator.Estimator(net=net,
-                                  loss=loss,
-                                  metrics=acc,
-                                  context=ctx)
+        est = Estimator(net=net,
+                        loss=loss,
+                        metrics=acc,
+                        context=ctx)
         assert 'No trainer specified' in str(w[-1].message)
     est.fit(train_data=train_data,
             epochs=num_epochs,
@@ -179,11 +180,11 @@ def test_trainer():
     # input invalid trainer
     trainer = 'sgd'
     with assert_raises(ValueError):
-        est = estimator.Estimator(net=net,
-                                  loss=loss,
-                                  metrics=acc,
-                                  trainers=trainer,
-                                  context=ctx)
+        est = Estimator(net=net,
+                        loss=loss,
+                        metrics=acc,
+                        trainer=trainer,
+                        context=ctx)
 
 
 def test_metric():
@@ -200,59 +201,54 @@ def test_metric():
     net.initialize(ctx=ctx)
     trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001})
     # input no metric
-    est = estimator.Estimator(net=net,
-                              loss=loss,
-                              trainers=trainer,
-                              context=ctx)
+    est = Estimator(net=net,
+                    loss=loss,
+                    trainer=trainer,
+                    context=ctx)
     est.fit(train_data=train_data,
             epochs=num_epochs,
             batch_size=batch_size)
     # input list of metrics
     metrics = [mx.metric.Accuracy(), mx.metric.Accuracy()]
-    est = estimator.Estimator(net=net,
-                              loss=loss,
-                              metrics=metrics,
-                              trainers=trainer,
-                              context=ctx)
+    est = Estimator(net=net,
+                    loss=loss,
+                    metrics=metrics,
+                    trainer=trainer,
+                    context=ctx)
     est.fit(train_data=train_data,
             epochs=num_epochs,
             batch_size=batch_size)
     # input invalid metric
     with assert_raises(ValueError):
-        est = estimator.Estimator(net=net,
-                                  loss=loss,
-                                  metrics='acc',
-                                  trainers=trainer,
-                                  context=ctx)
+        est = Estimator(net=net,
+                        loss=loss,
+                        metrics='acc',
+                        trainer=trainer,
+                        context=ctx)
     # test default metric
     loss = gluon.loss.SoftmaxCrossEntropyLoss()
-    est = estimator.Estimator(net=net,
-                              loss=loss,
-                              trainers=trainer,
-                              context=ctx)
+    est = Estimator(net=net,
+                    loss=loss,
+                    trainer=trainer,
+                    context=ctx)
     assert isinstance(est.train_metrics[0], mx.metric.Accuracy)
 
 
 def test_loss():
-    ''' test with no loss, invalid loss '''
+    ''' test with invalid loss '''
     net = get_model()
     ctx = mx.cpu()
     acc = mx.metric.Accuracy()
     net.initialize(ctx=ctx)
     trainer = gluon.Trainer(net.collect_params(), 'sgd', {'learning_rate': 0.001})
-    # input no loss
-    with assert_raises(ValueError):
-        est = estimator.Estimator(net=net,
-                                  trainers=trainer,
-                                  metrics=acc,
-                                  context=ctx)
     # input invalid loss
     with assert_raises(ValueError):
-        est = estimator.Estimator(net=net,
-                                  loss='mse',
-                                  metrics=acc,
-                                  trainers=trainer,
-                                  context=ctx)
+        est = Estimator(net=net,
+                        loss='mse',
+                        metrics=acc,
+                        trainer=trainer,
+                        context=ctx)
+
 
 def test_context():
     ''' test with no context, list of context, invalid context '''
@@ -260,18 +256,69 @@ def test_context():
     loss = gluon.loss.L2Loss()
     metrics = mx.metric.Accuracy()
     # input no context
-    est = estimator.Estimator(net=net,
-                              loss=loss,
-                              metrics=metrics)
+    est = Estimator(net=net,
+                    loss=loss,
+                    metrics=metrics)
     # input list of context
     ctx = [mx.gpu(0), mx.gpu(1)]
-    est = estimator.Estimator(net=net,
-                              loss=loss,
-                              metrics=metrics,
-                              context=ctx)
+    est = Estimator(net=net,
+                    loss=loss,
+                    metrics=metrics,
+                    context=ctx)
     # input invalid context
     with assert_raises(ValueError):
-        est = estimator.Estimator(net=net,
-                                  loss=loss,
-                                  metrics=metrics,
-                                  context='cpu')
+        est = Estimator(net=net,
+                        loss=loss,
+                        metrics=metrics,
+                        context='cpu')
+
+
+def test_categorize_handlers():
+    class CustomHandler1(EventHandler):
+        def __init__(self):
+            super(CustomHandler1, self).__init__()
+
+        def train_begin(self):
+            print("custom train begin")
+
+    class CustomHandler2(EventHandler):
+        def __init__(self):
+            super(CustomHandler2, self).__init__()
+
+        def epoch_begin(self):
+            print("custom epoch begin")
+
+        def batch_begin(self):
+            print("custom batch begin")
+
+        def train_end(self):
+            print("custom train end")
+
+    class CustomHandler3(EventHandler):
+        def __init__(self):
+            super(CustomHandler3, self).__init__()
+
+        def epoch_begin(self):
+            print("custom epoch begin")
+
+        def batch_begin(self):
+            print("custom batch begin")
+
+        def batch_end(self):
+            print("custom batch end")
+
+        def train_end(self):
+            print("custom train end")
+
+    net = nn.Sequential()
+    net.add(nn.Dense(10))
+    loss = gluon.loss.SoftmaxCrossEntropyLoss()
+    est = Estimator(net, loss=loss)
+    event_handlers = [CustomHandler1(), CustomHandler2(), CustomHandler3()]
+    train_begin, epoch_begin, batch_begin, \
+    batch_end, epoch_end, train_end = est._categorize_handlers(event_handlers)
+    assert len(train_begin) == 1
+    assert len(epoch_begin) == 2
+    assert len(batch_begin) == 2
+    assert len(batch_end) == 1
+    assert len(train_end) == 2
diff --git a/tests/python/unittest/test_gluon_event_handler.py b/tests/python/unittest/test_gluon_event_handler.py
index a551594d6430..ccbcb54b226b 100644
--- a/tests/python/unittest/test_gluon_event_handler.py
+++ b/tests/python/unittest/test_gluon_event_handler.py
@@ -45,7 +45,7 @@ def test_checkpoint_handler():
     ce_loss = loss.SoftmaxCrossEntropyLoss()
     acc = mx.metric.Accuracy()
     est = estimator.Estimator(net, loss=ce_loss, metrics=acc)
-    checkpoint_handler = [event_handler.CheckpointHandler(est, file_path,
+    checkpoint_handler = [event_handler.CheckpointHandler(file_path,
                                                           save_best_only=save_best_only,
                                                           mode=mode)]
     est.fit(test_data, event_handlers=checkpoint_handler, epochs=1)
@@ -63,15 +63,15 @@ def test_early_stopping():
     ce_loss = loss.SoftmaxCrossEntropyLoss()
     acc = mx.metric.Accuracy()
     est = estimator.Estimator(net, loss=ce_loss, metrics=acc)
-    early_stopping = [event_handler.EarlyStoppingHandler(est, monitor,
+    early_stopping = [event_handler.EarlyStoppingHandler(monitor,
                                                          patience=patience,
-                                                          mode=mode)]
-    est.fit(test_data, event_handlers=early_stopping, epochs=1)
+                                                         mode=mode)]
+    est.fit(test_data, event_handlers=early_stopping, epochs=3)
 
     mode = 'auto'
     monitor = 'train_accuracy'
     patience = 2
-    early_stopping = [event_handler.EarlyStoppingHandler(est, monitor,
+    early_stopping = [event_handler.EarlyStoppingHandler(monitor,
                                                          patience=patience,
                                                           mode=mode)]
     est.fit(test_data, event_handlers=early_stopping, epochs=1)
@@ -86,7 +86,7 @@ def test_logging():
     ce_loss = loss.SoftmaxCrossEntropyLoss()
     acc = mx.metric.Accuracy()
     est = estimator.Estimator(net, loss=ce_loss, metrics=acc)
-    logging_handler = [event_handler.LoggingHandler(est, file_name=file_name, file_location=tmpdir)]
+    logging_handler = [event_handler.LoggingHandler(file_name=file_name, file_location=tmpdir)]
     est.fit(test_data, event_handlers=logging_handler, epochs=1)
     assert os.path.isfile(output_dir)
     os.remove(output_dir)
\ No newline at end of file