From 90f5868b8e379d3560a9d9aef976a84be57ca8b8 Mon Sep 17 00:00:00 2001
From: jiamingy <jm.yuan@outlook.com>
Date: Wed, 5 May 2021 17:41:46 +0800
Subject: [PATCH] Add tolerance to early stopping.

---
 python-package/xgboost/callback.py | 21 +++++++++++++--------
 tests/python/test_callback.py      | 19 +++++++++++++++++++
 2 files changed, 32 insertions(+), 8 deletions(-)

diff --git a/python-package/xgboost/callback.py b/python-package/xgboost/callback.py
index e267abe37c40..6f8533df8883 100644
--- a/python-package/xgboost/callback.py
+++ b/python-package/xgboost/callback.py
@@ -493,19 +493,17 @@ def __init__(self,
                  metric_name: Optional[str] = None,
                  data_name: Optional[str] = None,
                  maximize: Optional[bool] = None,
-                 save_best: Optional[bool] = False) -> None:
+                 save_best: Optional[bool] = False,
+                 tolerance: float = 0) -> None:
         self.data = data_name
         self.metric_name = metric_name
         self.rounds = rounds
         self.save_best = save_best
         self.maximize = maximize
         self.stopping_history: CallbackContainer.EvalsLog = {}
+        self._tol = tolerance
 
-        if self.maximize is not None:
-            if self.maximize:
-                self.improve_op = lambda x, y: x > y
-            else:
-                self.improve_op = lambda x, y: x < y
+        self.improve_op = None
 
         self.current_rounds: int = 0
         self.best_scores: dict = {}
@@ -523,11 +521,18 @@ def _update_rounds(self, score, name, metric, model, epoch) -> bool:
             maximize_metrics = ('auc', 'aucpr', 'map', 'ndcg', 'auc@',
                                 'aucpr@', 'map@', 'ndcg@')
             if any(metric.startswith(x) for x in maximize_metrics):
-                self.improve_op = lambda x, y: x > y
+                self.improve_op = lambda x, y: x - y > -self._tol
                 self.maximize = True
             else:
-                self.improve_op = lambda x, y: x < y
+                self.improve_op = lambda x, y: y - x > -self._tol
                 self.maximize = False
+        else:
+            if self.maximize:
+                self.improve_op = lambda x, y: x - y > -self._tol
+            else:
+                self.improve_op = lambda x, y: y - x > -self._tol
+
+        assert self.improve_op
 
         if not self.stopping_history:  # First round
             self.current_rounds = 0
diff --git a/tests/python/test_callback.py b/tests/python/test_callback.py
index e9214822eaa2..5d2103a1f70d 100644
--- a/tests/python/test_callback.py
+++ b/tests/python/test_callback.py
@@ -126,6 +126,25 @@ def test_early_stopping_customize(self):
         assert len(dump) - booster.best_iteration == early_stopping_rounds + 1
         assert len(early_stop.stopping_history['Train']['CustomErr']) == len(dump)
 
+        tol = 10
+        early_stop = xgb.callback.EarlyStopping(
+            rounds=early_stopping_rounds,
+            metric_name='CustomErr',
+            data_name='Train',
+            tolerance=tol
+        )
+        booster = xgb.train(
+            {'objective': 'binary:logistic',
+             'eval_metric': ['error', 'rmse'],
+             'tree_method': 'hist'}, D_train,
+            evals=[(D_train, 'Train'), (D_valid, 'Valid')],
+            feval=tm.eval_error_metric,
+            num_boost_round=100,
+            callbacks=[early_stop],
+            verbose_eval=False)
+
+        assert booster.best_iteration == 100 - tol
+
     def test_early_stopping_skl(self):
         from sklearn.datasets import load_breast_cancer
         X, y = load_breast_cancer(return_X_y=True)