Track epoch metric separately

apache · Aug 31, 2018 · 2840a58 · 2840a58
1 parent 4033cdd
commit 2840a58
Show file tree

Hide file tree

Showing 2 changed files with 15 additions and 5 deletions.
diff --git a/python/mxnet/callback.py b/python/mxnet/callback.py
@@ -165,9 +165,13 @@ def __call__(self, param):
                     name_value = param.eval_metric.get_name_value()
                     if self.auto_reset:
                         param.eval_metric.reset()
-                    msg = 'Epoch[%d] Batch [%d]\tSpeed: %.2f samples/sec'
-                    msg += '\t%s=%f'*len(name_value)
-                    logging.info(msg, param.epoch, count, speed, *sum(name_value, ()))
+                        msg = 'Epoch[%d] Batch [%d-%d]\tSpeed: %.2f samples/sec'
+                        msg += '\t%s=%f'*len(name_value)
+                        logging.info(msg, param.epoch, count-self.frequent, count, speed, *sum(name_value, ()))
+                    else:
+                        msg = 'Epoch[%d] Batch [0-%d]\tSpeed: %.2f samples/sec'
+                        msg += '\t%s=%f'*len(name_value)
+                        logging.info(msg, param.epoch, count, speed, *sum(name_value, ()))
                 else:
                     logging.info("Iter[%d] Batch [%d]\tSpeed: %.2f samples/sec",
                                  param.epoch, count, speed)

diff --git a/python/mxnet/module/base_module.py b/python/mxnet/module/base_module.py
@@ -507,13 +507,15 @@ def fit(self, train_data, eval_data=None, eval_metric='acc',
             validation_metric = eval_metric
         if not isinstance(eval_metric, metric.EvalMetric):
             eval_metric = metric.create(eval_metric)
+            epoch_eval_metric = metric.create(eval_metric)
 
         ################################################################################
         # training loop
         ################################################################################
         for epoch in range(begin_epoch, num_epoch):
             tic = time.time()
             eval_metric.reset()
+            epoch_eval_metric.reset()
             nbatch = 0
             data_iter = iter(train_data)
             end_of_batch = False
@@ -529,8 +531,12 @@ def fit(self, train_data, eval_data=None, eval_metric='acc',
                     self.update_metric(eval_metric,
                                        [db.label for db in data_batch],
                                        pre_sliced=True)
+                    self.update_metric(epoch_eval_metric,
+                                       [db.label for db in data_batch],
+                                       pre_sliced=True)
                 else:
                     self.update_metric(eval_metric, data_batch.label)
+                    self.update_metric(epoch_eval_metric, data_batch.label)
 
                 try:
                     # pre fetch next batch
@@ -543,7 +549,7 @@ def fit(self, train_data, eval_data=None, eval_metric='acc',
                     monitor.toc_print()
 
                 if end_of_batch:
-                    eval_name_vals = eval_metric.get_name_value()
+                    eval_name_vals = epoch_eval_metric.get_name_value()
 
                 if batch_end_callback is not None:
                     batch_end_params = BatchEndParam(epoch=epoch, nbatch=nbatch,
@@ -555,7 +561,7 @@ def fit(self, train_data, eval_data=None, eval_metric='acc',
 
             # one epoch of training is finished
             for name, val in eval_name_vals:
-                self.logger.info('Epoch[%d] Train-%s=%f', epoch, name, val)
+                self.logger.info('Epoch[%d] Train-%s (averaged over entire epoch)=%f', epoch, name, val)
             toc = time.time()
             self.logger.info('Epoch[%d] Time cost=%.3f', epoch, (toc-tic))