From 79c70076c05cd57243726d77e07213b60c11ca02 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Sun, 25 Oct 2020 23:55:42 -0500
Subject: [PATCH 01/14] [ci] [python] reduce unnecessary data loading in tests

---
 tests/python_package_test/test_basic.py   | 13 +++-
 tests/python_package_test/test_engine.py  | 93 ++++++++++++++---------
 tests/python_package_test/test_sklearn.py | 81 +++++++++++++-------
 3 files changed, 118 insertions(+), 69 deletions(-)

diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py
index a3d6b8eced3a..d68651d3b236 100644
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@@ -3,6 +3,8 @@
 import tempfile
 import unittest
 
+from functools import lru_cache
+
 import lightgbm as lgb
 import numpy as np
 
@@ -11,10 +13,15 @@
 from sklearn.model_selection import train_test_split
 
 
+@lru_cache
+def _load_breast_cancer(**kwargs):
+    return load_breast_cancer(**kwargs)
+
+
 class TestBasic(unittest.TestCase):
 
     def test(self):
-        X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(return_X_y=True),
+        X_train, X_test, y_train, y_test = train_test_split(*_load_breast_cancer(return_X_y=True),
                                                             test_size=0.1, random_state=2)
         train_data = lgb.Dataset(X_train, label=y_train)
         valid_data = train_data.create_valid(X_test, label=y_test)
@@ -86,7 +93,7 @@ def test(self):
         os.remove(tname)
 
     def test_chunked_dataset(self):
-        X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(return_X_y=True), test_size=0.1, random_state=2)
+        X_train, X_test, y_train, y_test = train_test_split(*_load_breast_cancer(return_X_y=True), test_size=0.1, random_state=2)
 
         chunk_size = X_train.shape[0] // 10 + 1
         X_train = [X_train[i * chunk_size:(i + 1) * chunk_size, :] for i in range(X_train.shape[0] // chunk_size + 1)]
@@ -273,7 +280,7 @@ def check_asserts(data):
             self.assertAlmostEqual(data.label[1], data.weight[1])
             self.assertListEqual(data.feature_name, data.get_feature_name())
 
-        X, y = load_breast_cancer(return_X_y=True)
+        X, y = _load_breast_cancer(return_X_y=True)
         sequence = np.ones(y.shape[0])
         sequence[0] = np.nan
         sequence[1] = np.inf
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 0c90a6bada87..fa069e830d69 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -7,6 +7,8 @@
 import random
 import unittest
 
+from functools import lru_cache
+
 import lightgbm as lgb
 import numpy as np
 from scipy.sparse import csr_matrix, isspmatrix_csr, isspmatrix_csc
@@ -51,9 +53,26 @@ def categorize(continuous_x):
     return np.digitize(continuous_x, bins=np.arange(0, 1, 0.01))
 
 
+@lru_cache
+def _load_boston(**kwargs):
+    return load_boston(**kwargs)
+
+@lru_cache
+def _load_breast_cancer(**kwargs):
+    return load_breast_cancer(**kwargs)
+
+@lru_cache
+def _load_digits(**kwargs):
+    return load_digits(**kwargs)
+
+@lru_cache
+def _load_iris(**kwargs):
+    return load_iris(**kwargs)
+
+
 class TestEngine(unittest.TestCase):
     def test_binary(self):
-        X, y = load_breast_cancer(return_X_y=True)
+        X, y = _load_breast_cancer(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'objective': 'binary',
@@ -75,7 +94,7 @@ def test_binary(self):
         self.assertAlmostEqual(evals_result['valid_0']['binary_logloss'][-1], ret, places=5)
 
     def test_rf(self):
-        X, y = load_breast_cancer(return_X_y=True)
+        X, y = _load_breast_cancer(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'boosting_type': 'rf',
@@ -100,7 +119,7 @@ def test_rf(self):
         self.assertAlmostEqual(evals_result['valid_0']['binary_logloss'][-1], ret, places=5)
 
     def test_regression(self):
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'metric': 'l2',
@@ -377,7 +396,7 @@ def test_categorical_non_zero_inputs(self):
         self.assertAlmostEqual(evals_result['valid_0']['auc'][-1], ret, places=5)
 
     def test_multiclass(self):
-        X, y = load_digits(n_class=10, return_X_y=True)
+        X, y = _load_digits(n_class=10, return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'objective': 'multiclass',
@@ -398,7 +417,7 @@ def test_multiclass(self):
         self.assertAlmostEqual(evals_result['valid_0']['multi_logloss'][-1], ret, places=5)
 
     def test_multiclass_rf(self):
-        X, y = load_digits(n_class=10, return_X_y=True)
+        X, y = _load_digits(n_class=10, return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'boosting_type': 'rf',
@@ -426,7 +445,7 @@ def test_multiclass_rf(self):
         self.assertAlmostEqual(evals_result['valid_0']['multi_logloss'][-1], ret, places=5)
 
     def test_multiclass_prediction_early_stopping(self):
-        X, y = load_digits(n_class=10, return_X_y=True)
+        X, y = _load_digits(n_class=10, return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'objective': 'multiclass',
@@ -452,7 +471,7 @@ def test_multiclass_prediction_early_stopping(self):
         self.assertLess(ret, 0.2)
 
     def test_multi_class_error(self):
-        X, y = load_digits(n_class=10, return_X_y=True)
+        X, y = _load_digits(n_class=10, return_X_y=True)
         params = {'objective': 'multiclass', 'num_classes': 10, 'metric': 'multi_error',
                   'num_leaves': 4, 'verbose': -1}
         lgb_data = lgb.Dataset(X, label=y)
@@ -497,7 +516,7 @@ def test_multi_class_error(self):
 
     def test_auc_mu(self):
         # should give same result as binary auc for 2 classes
-        X, y = load_digits(n_class=10, return_X_y=True)
+        X, y = _load_digits(n_class=10, return_X_y=True)
         y_new = np.zeros((len(y)))
         y_new[y != 0] = 1
         lgb_X = lgb.Dataset(X, label=y_new)
@@ -575,7 +594,7 @@ def test_auc_mu(self):
         self.assertNotEqual(results_weight['training']['auc_mu'][-1], results_no_weight['training']['auc_mu'][-1])
 
     def test_early_stopping(self):
-        X, y = load_breast_cancer(return_X_y=True)
+        X, y = _load_breast_cancer(return_X_y=True)
         params = {
             'objective': 'binary',
             'metric': 'binary_logloss',
@@ -607,7 +626,7 @@ def test_early_stopping(self):
         self.assertIn('binary_logloss', gbm.best_score[valid_set_name])
 
     def test_continue_train(self):
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'objective': 'regression',
@@ -635,7 +654,7 @@ def test_continue_train(self):
         os.remove(model_name)
 
     def test_continue_train_reused_dataset(self):
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         params = {
             'objective': 'regression',
             'verbose': -1
@@ -648,7 +667,7 @@ def test_continue_train_reused_dataset(self):
         self.assertEqual(gbm.current_iteration(), 20)
 
     def test_continue_train_dart(self):
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'boosting_type': 'dart',
@@ -671,7 +690,7 @@ def test_continue_train_dart(self):
         self.assertAlmostEqual(evals_result['valid_0']['l1'][-1], ret, places=5)
 
     def test_continue_train_multiclass(self):
-        X, y = load_iris(return_X_y=True)
+        X, y = _load_iris(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'objective': 'multiclass',
@@ -694,7 +713,7 @@ def test_continue_train_multiclass(self):
         self.assertAlmostEqual(evals_result['valid_0']['multi_logloss'][-1], ret, places=5)
 
     def test_cv(self):
-        X_train, y_train = load_boston(return_X_y=True)
+        X_train, y_train = _load_boston(return_X_y=True)
         params = {'verbose': -1}
         lgb_train = lgb.Dataset(X_train, y_train)
         # shuffle = False, override metric in params
@@ -753,7 +772,7 @@ def test_cv(self):
         np.testing.assert_allclose(cv_res_lambda['ndcg@3-mean'], cv_res_lambda_obj['ndcg@3-mean'])
 
     def test_cvbooster(self):
-        X, y = load_breast_cancer(return_X_y=True)
+        X, y = _load_breast_cancer(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'objective': 'binary',
@@ -797,7 +816,7 @@ def test_cvbooster(self):
         self.assertLess(ret, 0.15)
 
     def test_feature_name(self):
-        X_train, y_train = load_boston(return_X_y=True)
+        X_train, y_train = _load_boston(return_X_y=True)
         params = {'verbose': -1}
         lgb_train = lgb.Dataset(X_train, y_train)
         feature_names = ['f_' + str(i) for i in range(X_train.shape[-1])]
@@ -825,7 +844,7 @@ def test_feature_name_with_non_ascii(self):
 
     def test_save_load_copy_pickle(self):
         def train_and_predict(init_model=None, return_model=False):
-            X, y = load_boston(return_X_y=True)
+            X, y = _load_boston(return_X_y=True)
             X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
             params = {
                 'objective': 'regression',
@@ -989,7 +1008,7 @@ def test_reference_chain(self):
         self.assertEqual(len(evals_result['valid_1']['rmse']), 20)
 
     def test_contribs(self):
-        X, y = load_breast_cancer(return_X_y=True)
+        X, y = _load_breast_cancer(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'objective': 'binary',
@@ -1372,7 +1391,7 @@ def test_small_max_bin(self):
         np.random.seed()  # reset seed
 
     def test_refit(self):
-        X, y = load_breast_cancer(return_X_y=True)
+        X, y = _load_breast_cancer(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'objective': 'binary',
@@ -1388,7 +1407,7 @@ def test_refit(self):
         self.assertGreater(err_pred, new_err_pred)
 
     def test_mape_rf(self):
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         params = {
             'boosting_type': 'rf',
             'objective': 'mape',
@@ -1405,7 +1424,7 @@ def test_mape_rf(self):
         self.assertGreater(pred_mean, 20)
 
     def test_mape_dart(self):
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         params = {
             'boosting_type': 'dart',
             'objective': 'mape',
@@ -1484,7 +1503,7 @@ def preprocess_data(dtrain, dtest, params):
             params['num_class'] = 4
             return dtrain, dtest, params
 
-        X, y = load_iris(return_X_y=True)
+        X, y = _load_iris(return_X_y=True)
         dataset = lgb.Dataset(X, y, free_raw_data=False)
         params = {'objective': 'multiclass', 'num_class': 3, 'verbose': -1}
         results = lgb.cv(params, dataset, num_boost_round=10, fpreproc=preprocess_data)
@@ -1492,7 +1511,7 @@ def preprocess_data(dtrain, dtest, params):
         self.assertEqual(len(results['multi_logloss-mean']), 10)
 
     def test_metrics(self):
-        X, y = load_digits(n_class=2, return_X_y=True)
+        X, y = _load_digits(n_class=2, return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         lgb_train = lgb.Dataset(X_train, y_train, silent=True)
         lgb_valid = lgb.Dataset(X_test, y_test, reference=lgb_train, silent=True)
@@ -1798,7 +1817,7 @@ def train_booster(params=params_obj_verbose, **kwargs):
         self.assertEqual(len(evals_result), 1)
         self.assertIn('error', evals_result['valid_0'])
 
-        X, y = load_digits(n_class=3, return_X_y=True)
+        X, y = _load_digits(n_class=3, return_X_y=True)
         lgb_train = lgb.Dataset(X, y, silent=True)
 
         obj_multi_aliases = ['multiclass', 'softmax', 'multiclassova', 'multiclass_ova', 'ova', 'ovr']
@@ -1866,7 +1885,7 @@ def train_booster(params=params_obj_verbose, **kwargs):
                           params_class_3_verbose, metrics='binary_error', fobj=dummy_obj)
 
     def test_multiple_feval_train(self):
-        X, y = load_breast_cancer(return_X_y=True)
+        X, y = _load_breast_cancer(return_X_y=True)
 
         params = {'verbose': -1, 'objective': 'binary', 'metric': 'binary_logloss'}
 
@@ -1889,7 +1908,7 @@ def test_multiple_feval_train(self):
         self.assertIn('decreasing_metric', evals_result['valid_0'])
 
     def test_multiple_feval_cv(self):
-        X, y = load_breast_cancer(return_X_y=True)
+        X, y = _load_breast_cancer(return_X_y=True)
 
         params = {'verbose': -1, 'objective': 'binary', 'metric': 'binary_logloss'}
 
@@ -1912,7 +1931,7 @@ def test_multiple_feval_cv(self):
 
     @unittest.skipIf(psutil.virtual_memory().available / 1024 / 1024 / 1024 < 3, 'not enough RAM')
     def test_model_size(self):
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         data = lgb.Dataset(X, y)
         bst = lgb.train({'verbose': -1}, data, num_boost_round=2)
         y_pred = bst.predict(X)
@@ -1938,7 +1957,7 @@ def test_model_size(self):
             self.skipTest('not enough RAM')
 
     def test_get_split_value_histogram(self):
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         lgb_train = lgb.Dataset(X, y, categorical_feature=[2])
         gbm = lgb.train({'verbose': -1}, lgb_train, num_boost_round=20)
         # test XGBoost-style return value
@@ -2048,7 +2067,7 @@ def metrics_combination_cv_regression(metric_list, assumed_iteration,
                          eval_train_metric=eval_train_metric)
             self.assertEqual(assumed_iteration, len(ret[list(ret.keys())[0]]))
 
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
         X_test1, X_test2, y_test1, y_test2 = train_test_split(X_test, y_test, test_size=0.5, random_state=73)
         lgb_train = lgb.Dataset(X_train, y_train)
@@ -2126,7 +2145,7 @@ def metrics_combination_cv_regression(metric_list, assumed_iteration,
                                                                            decreasing_metric(preds, train_data)])
 
     def test_node_level_subcol(self):
-        X, y = load_breast_cancer(return_X_y=True)
+        X, y = _load_breast_cancer(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'objective': 'binary',
@@ -2307,7 +2326,7 @@ def test_dataset_params_with_reference(self):
 
     def test_extra_trees(self):
         # check extra trees increases regularization
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         lgb_x = lgb.Dataset(X, label=y)
         params = {'objective': 'regression',
                   'num_leaves': 32,
@@ -2325,7 +2344,7 @@ def test_extra_trees(self):
 
     def test_path_smoothing(self):
         # check path smoothing increases regularization
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         lgb_x = lgb.Dataset(X, label=y)
         params = {'objective': 'regression',
                   'num_leaves': 32,
@@ -2347,7 +2366,7 @@ def _imptcs_to_numpy(X, impcts_dict):
             cols = ['Column_' + str(i) for i in range(X.shape[1])]
             return [impcts_dict.get(col, 0.) for col in cols]
 
-        X, y = load_breast_cancer(return_X_y=True)
+        X, y = _load_breast_cancer(return_X_y=True)
         data = lgb.Dataset(X, label=y)
         num_trees = 10
         bst = lgb.train({"objective": "binary", "verbose": -1}, data, num_trees)
@@ -2392,7 +2411,7 @@ def _imptcs_to_numpy(X, impcts_dict):
             self.assertIsNone(tree_df.loc[0, col])
 
     def test_interaction_constraints(self):
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         num_features = X.shape[1]
         train_data = lgb.Dataset(X, label=y)
         # check that constraint containing all features is equivalent to no constraint
@@ -2469,7 +2488,7 @@ def inner_test(X, y, params, early_stopping_rounds):
             np.testing.assert_allclose(pred4, pred6)
 
         # test for regression
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         params = {
             'objective': 'regression',
             'verbose': -1,
@@ -2482,7 +2501,7 @@ def inner_test(X, y, params, early_stopping_rounds):
         inner_test(X, y, params, early_stopping_rounds=None)
 
         # test for multi-class
-        X, y = load_iris(return_X_y=True)
+        X, y = _load_iris(return_X_y=True)
         params = {
             'objective': 'multiclass',
             'metric': 'multi_logloss',
@@ -2496,7 +2515,7 @@ def inner_test(X, y, params, early_stopping_rounds):
         inner_test(X, y, params, early_stopping_rounds=None)
 
         # test for binary
-        X, y = load_breast_cancer(return_X_y=True)
+        X, y = _load_breast_cancer(return_X_y=True)
         params = {
             'objective': 'binary',
             'metric': 'binary_logloss',
diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index cb2f42c21ec6..eabf8b1e3f22 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -6,6 +6,8 @@
 import unittest
 import warnings
 
+from functools import lru_cache
+
 import lightgbm as lgb
 import numpy as np
 from sklearn import __version__ as sk_version
@@ -74,10 +76,31 @@ def multi_logloss(y_true, y_pred):
     return np.mean([-math.log(y_pred[i][y]) for i, y in enumerate(y_true)])
 
 
+@lru_cache
+def _load_boston(**kwargs):
+    return load_boston(**kwargs)
+
+@lru_cache
+def _load_breast_cancer(**kwargs):
+    return load_breast_cancer(**kwargs)
+
+@lru_cache
+def _load_digits(**kwargs):
+    return load_digits(**kwargs)
+
+@lru_cache
+def _load_iris(**kwargs):
+    return load_iris(**kwargs)
+
+@lru_cache
+def _load_linnerud(**kwargs):
+    return load_linnerud(**kwargs)
+
+
 class TestSklearn(unittest.TestCase):
 
     def test_binary(self):
-        X, y = load_breast_cancer(return_X_y=True)
+        X, y = _load_breast_cancer(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         gbm = lgb.LGBMClassifier(n_estimators=50, silent=True)
         gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
@@ -86,7 +109,7 @@ def test_binary(self):
         self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['binary_logloss'][gbm.best_iteration_ - 1], places=5)
 
     def test_regression(self):
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         gbm = lgb.LGBMRegressor(n_estimators=50, silent=True)
         gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
@@ -95,7 +118,7 @@ def test_regression(self):
         self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1], places=5)
 
     def test_multiclass(self):
-        X, y = load_digits(n_class=10, return_X_y=True)
+        X, y = _load_digits(n_class=10, return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         gbm = lgb.LGBMClassifier(n_estimators=50, silent=True)
         gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
@@ -138,7 +161,7 @@ def test_xendcg(self):
         self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.6253)
 
     def test_regression_with_custom_objective(self):
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         gbm = lgb.LGBMRegressor(n_estimators=50, silent=True, objective=objective_ls)
         gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
@@ -147,7 +170,7 @@ def test_regression_with_custom_objective(self):
         self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1], places=5)
 
     def test_binary_classification_with_custom_objective(self):
-        X, y = load_digits(n_class=2, return_X_y=True)
+        X, y = _load_digits(n_class=2, return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         gbm = lgb.LGBMClassifier(n_estimators=50, silent=True, objective=logregobj)
         gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
@@ -159,7 +182,7 @@ def test_binary_classification_with_custom_objective(self):
         self.assertLess(ret, 0.05)
 
     def test_dart(self):
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         gbm = lgb.LGBMRegressor(boosting_type='dart', n_estimators=50)
         gbm.fit(X_train, y_train)
@@ -172,7 +195,7 @@ def test_dart(self):
     def test_stacking_classifier(self):
         from sklearn.ensemble import StackingClassifier
 
-        X, y = load_iris(return_X_y=True)
+        X, y = _load_iris(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
         classifiers = [('gbm1', lgb.LGBMClassifier(n_estimators=3)),
                        ('gbm2', lgb.LGBMClassifier(n_estimators=3))]
@@ -199,7 +222,7 @@ def test_stacking_classifier(self):
     def test_stacking_regressor(self):
         from sklearn.ensemble import StackingRegressor
 
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
         regressors = [('gbm1', lgb.LGBMRegressor(n_estimators=3)),
                       ('gbm2', lgb.LGBMRegressor(n_estimators=3))]
@@ -218,7 +241,7 @@ def test_stacking_regressor(self):
         self.assertEqual(len(reg.final_estimator_.feature_importances_), 15)
 
     def test_grid_search(self):
-        X, y = load_iris(return_X_y=True)
+        X, y = _load_iris(return_X_y=True)
         y = y.astype(str)  # utilize label encoder at it's max power
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
                                                             random_state=42)
@@ -248,7 +271,7 @@ def test_grid_search(self):
         self.assertLessEqual(score, 1.)
 
     def test_random_search(self):
-        X, y = load_iris(return_X_y=True)
+        X, y = _load_iris(return_X_y=True)
         y = y.astype(str)  # utilize label encoder at it's max power
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
                                                             random_state=42)
@@ -302,7 +325,7 @@ def test_multioutput_classifier(self):
     # sklearn < 0.23 does not have as_frame parameter
     @unittest.skipIf(sk_version < '0.23.0', 'scikit-learn version is less than 0.23')
     def test_multioutput_regressor(self):
-        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
+        bunch = _load_linnerud(as_frame=True)  # returns a Bunch instance
         X, y = bunch['data'], bunch['target']
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
                                                             random_state=42)
@@ -341,7 +364,7 @@ def test_classifier_chain(self):
     # sklearn < 0.23 does not have as_frame parameter
     @unittest.skipIf(sk_version < '0.23.0', 'scikit-learn version is less than 0.23')
     def test_regressor_chain(self):
-        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
+        bunch = _load_linnerud(as_frame=True)  # returns a Bunch instance
         X, y = bunch['data'], bunch['target']
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         order = [2, 0, 1]
@@ -358,7 +381,7 @@ def test_regressor_chain(self):
             self.assertIsInstance(regressor.booster_, lgb.Booster)
 
     def test_clone_and_property(self):
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         gbm = lgb.LGBMRegressor(n_estimators=10, silent=True)
         gbm.fit(X, y, verbose=False)
 
@@ -366,7 +389,7 @@ def test_clone_and_property(self):
         self.assertIsInstance(gbm.booster_, lgb.Booster)
         self.assertIsInstance(gbm.feature_importances_, np.ndarray)
 
-        X, y = load_digits(n_class=2, return_X_y=True)
+        X, y = _load_digits(n_class=2, return_X_y=True)
         clf = lgb.LGBMClassifier(n_estimators=10, silent=True)
         clf.fit(X, y, verbose=False)
         self.assertListEqual(sorted(clf.classes_), [0, 1])
@@ -375,7 +398,7 @@ def test_clone_and_property(self):
         self.assertIsInstance(clf.feature_importances_, np.ndarray)
 
     def test_joblib(self):
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         gbm = lgb.LGBMRegressor(n_estimators=10, objective=custom_asymmetric_obj,
                                 silent=True, importance_type='split')
@@ -400,7 +423,7 @@ def test_joblib(self):
         np.testing.assert_allclose(pred_origin, pred_pickle)
 
     def test_random_state_object(self):
-        X, y = load_iris(return_X_y=True)
+        X, y = _load_iris(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         state1 = np.random.RandomState(123)
         state2 = np.random.RandomState(123)
@@ -433,14 +456,14 @@ def test_random_state_object(self):
                           df1, df3)
 
     def test_feature_importances_single_leaf(self):
-        data = load_iris(return_X_y=False)
+        data = _load_iris(return_X_y=False)
         clf = lgb.LGBMClassifier(n_estimators=10)
         clf.fit(data.data, data.target)
         importances = clf.feature_importances_
         self.assertEqual(len(importances), 4)
 
     def test_feature_importances_type(self):
-        data = load_iris(return_X_y=False)
+        data = _load_iris(return_X_y=False)
         clf = lgb.LGBMClassifier(n_estimators=10)
         clf.fit(data.data, data.target)
         clf.set_params(importance_type='split')
@@ -564,7 +587,7 @@ def test_pandas_sparse(self):
 
     def test_predict(self):
         # With default params
-        iris = load_iris(return_X_y=False)
+        iris = _load_iris(return_X_y=False)
         X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,
                                                             test_size=0.2, random_state=42)
 
@@ -644,7 +667,7 @@ def test_predict(self):
                           res_engine, res_sklearn_params)
 
     def test_evaluate_train_set(self):
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         gbm = lgb.LGBMRegressor(n_estimators=10, silent=True)
         gbm.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)], verbose=False)
@@ -657,7 +680,7 @@ def test_evaluate_train_set(self):
         self.assertIn('l2', gbm.evals_result_['valid_1'])
 
     def test_metrics(self):
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         params = {'n_estimators': 2, 'verbose': -1}
         params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False}
 
@@ -697,7 +720,7 @@ def test_metrics(self):
         self.assertIn('mape', gbm.evals_result_['training'])
 
         # non-default metric with multiple metrics in eval_metric for LGBMClassifier
-        X_classification, y_classification = load_breast_cancer(return_X_y=True)
+        X_classification, y_classification = _load_breast_cancer(return_X_y=True)
         params_classification = {'n_estimators': 2, 'verbose': -1,
                                  'objective': 'binary', 'metric': 'binary_logloss'}
         params_fit_classification = {'X': X_classification, 'y': y_classification,
@@ -880,7 +903,7 @@ def test_metrics(self):
         self.assertIn('mape', gbm.evals_result_['training'])
         self.assertIn('error', gbm.evals_result_['training'])
 
-        X, y = load_digits(n_class=3, return_X_y=True)
+        X, y = _load_digits(n_class=3, return_X_y=True)
         params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False}
 
         # default metric and invalid binary metric is replaced with multiclass alternative
@@ -907,7 +930,7 @@ def test_metrics(self):
         self.assertIn('multi_logloss', gbm.evals_result_['training'])
         self.assertIn('multi_error', gbm.evals_result_['training'])
 
-        X, y = load_digits(n_class=2, return_X_y=True)
+        X, y = _load_digits(n_class=2, return_X_y=True)
         params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False}
 
         # default metric and invalid multiclass metric is replaced with binary alternative
@@ -924,7 +947,7 @@ def test_metrics(self):
 
     def test_multiple_eval_metrics(self):
 
-        X, y = load_breast_cancer(return_X_y=True)
+        X, y = _load_breast_cancer(return_X_y=True)
 
         params = {'n_estimators': 2, 'verbose': -1, 'objective': 'binary', 'metric': 'binary_logloss'}
         params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False}
@@ -1003,7 +1026,7 @@ def fit_and_check(eval_set_names, metric_names, assumed_iteration, first_metric_
                     self.assertEqual(assumed_iteration if eval_set_name != 'training' else gbm.n_estimators,
                                      gbm.best_iteration_)
 
-        X, y = load_boston(return_X_y=True)
+        X, y = _load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
         X_test1, X_test2, y_test1, y_test2 = train_test_split(X_test, y_test, test_size=0.5, random_state=72)
         params = {'n_estimators': 30,
@@ -1084,7 +1107,7 @@ def fit_and_check(eval_set_names, metric_names, assumed_iteration, first_metric_
         fit_and_check(['valid_0', 'valid_1'], ['l1', 'l2'], iter_min_l2, True)
 
     def test_class_weight(self):
-        X, y = load_digits(n_class=10, return_X_y=True)
+        X, y = _load_digits(n_class=10, return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
         y_train_str = y_train.astype('str')
         y_test_str = y_test.astype('str')
@@ -1118,7 +1141,7 @@ def test_class_weight(self):
                                            gbm_str.evals_result_[eval_set][metric])
 
     def test_continue_training_with_model(self):
-        X, y = load_digits(n_class=3, return_X_y=True)
+        X, y = _load_digits(n_class=3, return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         init_gbm = lgb.LGBMClassifier(n_estimators=5).fit(X_train, y_train, eval_set=(X_test, y_test),
                                                           verbose=False)
@@ -1133,7 +1156,7 @@ def test_continue_training_with_model(self):
     # sklearn < 0.22 requires passing "attributes" argument
     @unittest.skipIf(sk_version < '0.22.0', 'scikit-learn version is less than 0.22')
     def test_check_is_fitted(self):
-        X, y = load_digits(n_class=2, return_X_y=True)
+        X, y = _load_digits(n_class=2, return_X_y=True)
         est = lgb.LGBMModel(n_estimators=5, objective="binary")
         clf = lgb.LGBMClassifier(n_estimators=5)
         reg = lgb.LGBMRegressor(n_estimators=5)

From f792015960b1b7b30a48df14d849179f644ac916 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 26 Oct 2020 00:06:41 -0500
Subject: [PATCH 02/14] add profiling files to gitignore

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index 6e4ffb35670a..f2f2c6c2316f 100644
--- a/.gitignore
+++ b/.gitignore
@@ -318,6 +318,8 @@ htmlcov/
 .coverage.*
 .cache
 nosetests.xml
+prof/
+*.prof
 coverage.xml
 *,cover
 .hypothesis/

From c71a30f7481e05a1c25c6d90872914beef66edee Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 26 Oct 2020 12:29:36 -0500
Subject: [PATCH 03/14] just use cache()

---
 tests/python_package_test/test_basic.py   |  4 ++--
 tests/python_package_test/test_engine.py  | 10 +++++-----
 tests/python_package_test/test_sklearn.py | 12 ++++++------
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py
index d68651d3b236..633934268f90 100644
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@@ -3,7 +3,7 @@
 import tempfile
 import unittest
 
-from functools import lru_cache
+from functools import cache
 
 import lightgbm as lgb
 import numpy as np
@@ -13,7 +13,7 @@
 from sklearn.model_selection import train_test_split
 
 
-@lru_cache
+@cache
 def _load_breast_cancer(**kwargs):
     return load_breast_cancer(**kwargs)
 
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index fa069e830d69..096246a753bc 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -7,7 +7,7 @@
 import random
 import unittest
 
-from functools import lru_cache
+from functools import cache
 
 import lightgbm as lgb
 import numpy as np
@@ -53,19 +53,19 @@ def categorize(continuous_x):
     return np.digitize(continuous_x, bins=np.arange(0, 1, 0.01))
 
 
-@lru_cache
+@cache
 def _load_boston(**kwargs):
     return load_boston(**kwargs)
 
-@lru_cache
+@cache
 def _load_breast_cancer(**kwargs):
     return load_breast_cancer(**kwargs)
 
-@lru_cache
+@cache
 def _load_digits(**kwargs):
     return load_digits(**kwargs)
 
-@lru_cache
+@cache
 def _load_iris(**kwargs):
     return load_iris(**kwargs)
 
diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index eabf8b1e3f22..51ff70cfbf50 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -6,7 +6,7 @@
 import unittest
 import warnings
 
-from functools import lru_cache
+from functools import cache
 
 import lightgbm as lgb
 import numpy as np
@@ -76,23 +76,23 @@ def multi_logloss(y_true, y_pred):
     return np.mean([-math.log(y_pred[i][y]) for i, y in enumerate(y_true)])
 
 
-@lru_cache
+@cache
 def _load_boston(**kwargs):
     return load_boston(**kwargs)
 
-@lru_cache
+@cache
 def _load_breast_cancer(**kwargs):
     return load_breast_cancer(**kwargs)
 
-@lru_cache
+@cache
 def _load_digits(**kwargs):
     return load_digits(**kwargs)
 
-@lru_cache
+@cache
 def _load_iris(**kwargs):
     return load_iris(**kwargs)
 
-@lru_cache
+@cache
 def _load_linnerud(**kwargs):
     return load_linnerud(**kwargs)
 

From 979b76a0078472dccdfbe15dc4e73078e798ffa2 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 26 Oct 2020 12:38:18 -0500
Subject: [PATCH 04/14] default on cache size

---
 tests/python_package_test/test_basic.py   |  4 ++--
 tests/python_package_test/test_engine.py  | 10 +++++-----
 tests/python_package_test/test_sklearn.py | 12 ++++++------
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py
index 633934268f90..62fd704a8de0 100644
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@@ -3,7 +3,7 @@
 import tempfile
 import unittest
 
-from functools import cache
+from functools import lru_cache
 
 import lightgbm as lgb
 import numpy as np
@@ -13,7 +13,7 @@
 from sklearn.model_selection import train_test_split
 
 
-@cache
+@lru_cache(maxsize=None)
 def _load_breast_cancer(**kwargs):
     return load_breast_cancer(**kwargs)
 
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 096246a753bc..1669fd2bfb3d 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -7,7 +7,7 @@
 import random
 import unittest
 
-from functools import cache
+from functools import lru_cache
 
 import lightgbm as lgb
 import numpy as np
@@ -53,19 +53,19 @@ def categorize(continuous_x):
     return np.digitize(continuous_x, bins=np.arange(0, 1, 0.01))
 
 
-@cache
+@lru_cache(maxsize=None)
 def _load_boston(**kwargs):
     return load_boston(**kwargs)
 
-@cache
+@lru_cache(maxsize=None)
 def _load_breast_cancer(**kwargs):
     return load_breast_cancer(**kwargs)
 
-@cache
+@lru_cache(maxsize=None)
 def _load_digits(**kwargs):
     return load_digits(**kwargs)
 
-@cache
+@lru_cache(maxsize=None)
 def _load_iris(**kwargs):
     return load_iris(**kwargs)
 
diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index 51ff70cfbf50..68c65253c02d 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -6,7 +6,7 @@
 import unittest
 import warnings
 
-from functools import cache
+from functools import lru_cache
 
 import lightgbm as lgb
 import numpy as np
@@ -76,23 +76,23 @@ def multi_logloss(y_true, y_pred):
     return np.mean([-math.log(y_pred[i][y]) for i, y in enumerate(y_true)])
 
 
-@cache
+@lru_cache(maxsize=None)
 def _load_boston(**kwargs):
     return load_boston(**kwargs)
 
-@cache
+@lru_cache(maxsize=None)
 def _load_breast_cancer(**kwargs):
     return load_breast_cancer(**kwargs)
 
-@cache
+@lru_cache(maxsize=None)
 def _load_digits(**kwargs):
     return load_digits(**kwargs)
 
-@cache
+@lru_cache(maxsize=None)
 def _load_iris(**kwargs):
     return load_iris(**kwargs)
 
-@cache
+@lru_cache(maxsize=None)
 def _load_linnerud(**kwargs):
     return load_linnerud(**kwargs)
 

From a86a6c22287977d9654f9cd38a028d746f996a91 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 26 Oct 2020 16:32:15 -0500
Subject: [PATCH 05/14] patch lru_cache on Python 2.7

---
 tests/python_package_test/test_basic.py   | 15 +++++++++++++--
 tests/python_package_test/test_engine.py  | 15 +++++++++++++--
 tests/python_package_test/test_sklearn.py | 15 +++++++++++++--
 3 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py
index 62fd704a8de0..6dd2658b2c30 100644
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@@ -3,8 +3,6 @@
 import tempfile
 import unittest
 
-from functools import lru_cache
-
 import lightgbm as lgb
 import numpy as np
 
@@ -12,6 +10,19 @@
 from sklearn.datasets import load_breast_cancer, dump_svmlight_file, load_svmlight_file
 from sklearn.model_selection import train_test_split
 
+try:
+    from functools import lru_cache
+except ImportError:
+    warnings.warn("Could not import functools.lru_cache", RuntimeWarning)
+    def lru_cache(user_function, maxsize=None):
+        @wraps(user_function)
+        def wrapper(*args, **kwargs):
+            arg_key = tuple(args, [item for item in kwargs.items()])
+            if arg_key not in cache:
+                cache[arg_key] = user_function(*args)
+            return cache[arg_key]
+        return wrapper
+
 
 @lru_cache(maxsize=None)
 def _load_breast_cancer(**kwargs):
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 1669fd2bfb3d..d76b9ac0a58c 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -7,8 +7,6 @@
 import random
 import unittest
 
-from functools import lru_cache
-
 import lightgbm as lgb
 import numpy as np
 from scipy.sparse import csr_matrix, isspmatrix_csr, isspmatrix_csc
@@ -22,6 +20,19 @@
 except ImportError:
     import pickle
 
+try:
+    from functools import lru_cache
+except ImportError:
+    warnings.warn("Could not import functools.lru_cache", RuntimeWarning)
+    def lru_cache(user_function, maxsize=None):
+        @wraps(user_function)
+        def wrapper(*args, **kwargs):
+            arg_key = tuple(args, [item for item in kwargs.items()])
+            if arg_key not in cache:
+                cache[arg_key] = user_function(*args)
+            return cache[arg_key]
+        return wrapper
+
 
 decreasing_generator = itertools.count(0, -1)
 
diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index 68c65253c02d..88ae92002274 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -6,8 +6,6 @@
 import unittest
 import warnings
 
-from functools import lru_cache
-
 import lightgbm as lgb
 import numpy as np
 from sklearn import __version__ as sk_version
@@ -24,6 +22,19 @@
                                             check_parameters_default_constructible)
 from sklearn.utils.validation import check_is_fitted
 
+try:
+    from functools import lru_cache
+except ImportError:
+    warnings.warn("Could not import functools.lru_cache", RuntimeWarning)
+    def lru_cache(user_function, maxsize=None):
+        @wraps(user_function)
+        def wrapper(*args, **kwargs):
+            arg_key = tuple(args, [item for item in kwargs.items()])
+            if arg_key not in cache:
+                cache[arg_key] = user_function(*args)
+            return cache[arg_key]
+        return wrapper
+
 
 decreasing_generator = itertools.count(0, -1)
 

From afff7d3638c94c9cc7ecfa2bd9db292142191f99 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 26 Oct 2020 16:37:07 -0500
Subject: [PATCH 06/14] linting

---
 tests/python_package_test/test_basic.py   | 1 +
 tests/python_package_test/test_engine.py  | 4 ++++
 tests/python_package_test/test_sklearn.py | 5 +++++
 3 files changed, 10 insertions(+)

diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py
index 6dd2658b2c30..24ac8ccded1a 100644
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@@ -14,6 +14,7 @@
     from functools import lru_cache
 except ImportError:
     warnings.warn("Could not import functools.lru_cache", RuntimeWarning)
+
     def lru_cache(user_function, maxsize=None):
         @wraps(user_function)
         def wrapper(*args, **kwargs):
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index d76b9ac0a58c..42241292d43a 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -24,6 +24,7 @@
     from functools import lru_cache
 except ImportError:
     warnings.warn("Could not import functools.lru_cache", RuntimeWarning)
+
     def lru_cache(user_function, maxsize=None):
         @wraps(user_function)
         def wrapper(*args, **kwargs):
@@ -68,14 +69,17 @@ def categorize(continuous_x):
 def _load_boston(**kwargs):
     return load_boston(**kwargs)
 
+
 @lru_cache(maxsize=None)
 def _load_breast_cancer(**kwargs):
     return load_breast_cancer(**kwargs)
 
+
 @lru_cache(maxsize=None)
 def _load_digits(**kwargs):
     return load_digits(**kwargs)
 
+
 @lru_cache(maxsize=None)
 def _load_iris(**kwargs):
     return load_iris(**kwargs)
diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index 88ae92002274..d9a3918950c7 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -26,6 +26,7 @@
     from functools import lru_cache
 except ImportError:
     warnings.warn("Could not import functools.lru_cache", RuntimeWarning)
+
     def lru_cache(user_function, maxsize=None):
         @wraps(user_function)
         def wrapper(*args, **kwargs):
@@ -91,18 +92,22 @@ def multi_logloss(y_true, y_pred):
 def _load_boston(**kwargs):
     return load_boston(**kwargs)
 
+
 @lru_cache(maxsize=None)
 def _load_breast_cancer(**kwargs):
     return load_breast_cancer(**kwargs)
 
+
 @lru_cache(maxsize=None)
 def _load_digits(**kwargs):
     return load_digits(**kwargs)
 
+
 @lru_cache(maxsize=None)
 def _load_iris(**kwargs):
     return load_iris(**kwargs)
 
+
 @lru_cache(maxsize=None)
 def _load_linnerud(**kwargs):
     return load_linnerud(**kwargs)

From 25a9bc7736010eb4278583c576dc761599c0040d Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 26 Oct 2020 22:38:42 -0500
Subject: [PATCH 07/14] reduce duplicated code

---
 tests/python_package_test/__init__.py     |  0
 tests/python_package_test/test_basic.py   | 14 +-------------
 tests/python_package_test/test_engine.py  | 14 +-------------
 tests/python_package_test/test_sklearn.py | 14 +-------------
 tests/python_package_test/utils.py        | 14 ++++++++++++++
 5 files changed, 17 insertions(+), 39 deletions(-)
 create mode 100644 tests/python_package_test/__init__.py
 create mode 100644 tests/python_package_test/utils.py

diff --git a/tests/python_package_test/__init__.py b/tests/python_package_test/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py
index 1c0accd8a356..19a4debbb914 100644
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@@ -10,19 +10,7 @@
 from sklearn.datasets import load_breast_cancer, dump_svmlight_file, load_svmlight_file
 from sklearn.model_selection import train_test_split
 
-try:
-    from functools import lru_cache
-except ImportError:
-    warnings.warn("Could not import functools.lru_cache", RuntimeWarning)
-
-    def lru_cache(user_function, maxsize=None):
-        @wraps(user_function)
-        def wrapper(*args, **kwargs):
-            arg_key = tuple(args, [item for item in kwargs.items()])
-            if arg_key not in cache:
-                cache[arg_key] = user_function(*args)
-            return cache[arg_key]
-        return wrapper
+from .utils import lru_cache
 
 
 @lru_cache(maxsize=None)
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 42241292d43a..67696174a711 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -20,19 +20,7 @@
 except ImportError:
     import pickle
 
-try:
-    from functools import lru_cache
-except ImportError:
-    warnings.warn("Could not import functools.lru_cache", RuntimeWarning)
-
-    def lru_cache(user_function, maxsize=None):
-        @wraps(user_function)
-        def wrapper(*args, **kwargs):
-            arg_key = tuple(args, [item for item in kwargs.items()])
-            if arg_key not in cache:
-                cache[arg_key] = user_function(*args)
-            return cache[arg_key]
-        return wrapper
+from .utils import lru_cache
 
 
 decreasing_generator = itertools.count(0, -1)
diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index d9a3918950c7..498009620bf3 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -22,19 +22,7 @@
                                             check_parameters_default_constructible)
 from sklearn.utils.validation import check_is_fitted
 
-try:
-    from functools import lru_cache
-except ImportError:
-    warnings.warn("Could not import functools.lru_cache", RuntimeWarning)
-
-    def lru_cache(user_function, maxsize=None):
-        @wraps(user_function)
-        def wrapper(*args, **kwargs):
-            arg_key = tuple(args, [item for item in kwargs.items()])
-            if arg_key not in cache:
-                cache[arg_key] = user_function(*args)
-            return cache[arg_key]
-        return wrapper
+from .utils import lru_cache
 
 
 decreasing_generator = itertools.count(0, -1)
diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py
new file mode 100644
index 000000000000..428b630e41e3
--- /dev/null
+++ b/tests/python_package_test/utils.py
@@ -0,0 +1,14 @@
+
+try:
+    from functools import lru_cache
+except ImportError:
+    warnings.warn("Could not import functools.lru_cache", RuntimeWarning)
+
+    def lru_cache(user_function, maxsize=None):
+        @wraps(user_function)
+        def wrapper(*args, **kwargs):
+            arg_key = tuple(args, [item for item in kwargs.items()])
+            if arg_key not in cache:
+                cache[arg_key] = user_function(*args)
+            return cache[arg_key]
+        return wrapper

From 3af0a5c1941f9802334348a86393b0b4b235bccc Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Mon, 26 Oct 2020 22:55:33 -0500
Subject: [PATCH 08/14] missing warnings

---
 tests/python_package_test/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py
index 428b630e41e3..cb8e02abc7ff 100644
--- a/tests/python_package_test/utils.py
+++ b/tests/python_package_test/utils.py
@@ -2,6 +2,7 @@
 try:
     from functools import lru_cache
 except ImportError:
+    import warnings
     warnings.warn("Could not import functools.lru_cache", RuntimeWarning)
 
     def lru_cache(user_function, maxsize=None):

From 3394222408a2ca004c3c4294aa7f9f844bf72c37 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Tue, 27 Oct 2020 23:09:13 -0500
Subject: [PATCH 09/14] fix imports

---
 tests/python_package_test/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py
index cb8e02abc7ff..a2e0ba629e36 100644
--- a/tests/python_package_test/utils.py
+++ b/tests/python_package_test/utils.py
@@ -1,8 +1,8 @@
-
 try:
     from functools import lru_cache
 except ImportError:
     import warnings
+    from functools import wraps
     warnings.warn("Could not import functools.lru_cache", RuntimeWarning)
 
     def lru_cache(user_function, maxsize=None):

From 3a5fe6094083d4603586ebe579ba5d0342106917 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 28 Oct 2020 00:56:33 -0500
Subject: [PATCH 10/14] fix lru_cache backport

---
 tests/python_package_test/utils.py | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py
index a2e0ba629e36..57f97b8b3ce7 100644
--- a/tests/python_package_test/utils.py
+++ b/tests/python_package_test/utils.py
@@ -2,14 +2,16 @@
     from functools import lru_cache
 except ImportError:
     import warnings
-    from functools import wraps
     warnings.warn("Could not import functools.lru_cache", RuntimeWarning)
 
-    def lru_cache(user_function, maxsize=None):
-        @wraps(user_function)
-        def wrapper(*args, **kwargs):
-            arg_key = tuple(args, [item for item in kwargs.items()])
-            if arg_key not in cache:
-                cache[arg_key] = user_function(*args)
-            return cache[arg_key]
-        return wrapper
+    def lru_cache(maxsize=None):
+        cache = {}
+
+        def _lru_wrapper(user_function):
+            def wrapper(*args, **kwargs):
+                arg_key = (args, tuple([item for item in kwargs.items()]))
+                if arg_key not in cache:
+                    cache[arg_key] = user_function(*args)
+                return cache[arg_key]
+            return wrapper
+        return _lru_wrapper

From c1cb1b26492ef4c393dc84f6e902ee0863b6cf83 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 28 Oct 2020 10:10:19 -0500
Subject: [PATCH 11/14] missing kwargs

---
 tests/python_package_test/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py
index 57f97b8b3ce7..76347c829257 100644
--- a/tests/python_package_test/utils.py
+++ b/tests/python_package_test/utils.py
@@ -11,7 +11,7 @@ def _lru_wrapper(user_function):
             def wrapper(*args, **kwargs):
                 arg_key = (args, tuple([item for item in kwargs.items()]))
                 if arg_key not in cache:
-                    cache[arg_key] = user_function(*args)
+                    cache[arg_key] = user_function(*args, **kwargs)
                 return cache[arg_key]
             return wrapper
         return _lru_wrapper

From 16ec8aa888f375355db9ce6ab2a07e0070ddd483 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 28 Oct 2020 21:39:29 +0000
Subject: [PATCH 12/14] Apply suggestions from code review

Co-authored-by: Nikita Titov <nekit94-08@mail.ru>
---
 tests/python_package_test/utils.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py
index 76347c829257..6c3e4601a1dd 100644
--- a/tests/python_package_test/utils.py
+++ b/tests/python_package_test/utils.py
@@ -1,3 +1,4 @@
+# coding: utf-8
 try:
     from functools import lru_cache
 except ImportError:
@@ -9,7 +10,7 @@ def lru_cache(maxsize=None):
 
         def _lru_wrapper(user_function):
             def wrapper(*args, **kwargs):
-                arg_key = (args, tuple([item for item in kwargs.items()]))
+                arg_key = (args, tuple(kwargs.items()))
                 if arg_key not in cache:
                     cache[arg_key] = user_function(*args, **kwargs)
                 return cache[arg_key]

From 4c2e7befa3ee4fe4f518699a8d1f81db0060d109 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Wed, 28 Oct 2020 17:38:08 -0500
Subject: [PATCH 13/14] reduce duplicated code

---
 tests/python_package_test/test_basic.py   |  15 ++--
 tests/python_package_test/test_engine.py  | 100 +++++++++-------------
 tests/python_package_test/test_sklearn.py |  89 +++++++------------
 tests/python_package_test/utils.py        |  27 ++++++
 4 files changed, 103 insertions(+), 128 deletions(-)

diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py
index 19a4debbb914..f53933021b14 100644
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@@ -7,21 +7,16 @@
 import numpy as np
 
 from scipy import sparse
-from sklearn.datasets import load_breast_cancer, dump_svmlight_file, load_svmlight_file
+from sklearn.datasets import dump_svmlight_file, load_svmlight_file
 from sklearn.model_selection import train_test_split
 
-from .utils import lru_cache
-
-
-@lru_cache(maxsize=None)
-def _load_breast_cancer(**kwargs):
-    return load_breast_cancer(**kwargs)
+from .utils import load_breast_cancer, lru_cache
 
 
 class TestBasic(unittest.TestCase):
 
     def test(self):
-        X_train, X_test, y_train, y_test = train_test_split(*_load_breast_cancer(return_X_y=True),
+        X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(return_X_y=True),
                                                             test_size=0.1, random_state=2)
         train_data = lgb.Dataset(X_train, label=y_train)
         valid_data = train_data.create_valid(X_test, label=y_test)
@@ -93,7 +88,7 @@ def test(self):
         os.remove(tname)
 
     def test_chunked_dataset(self):
-        X_train, X_test, y_train, y_test = train_test_split(*_load_breast_cancer(return_X_y=True), test_size=0.1, random_state=2)
+        X_train, X_test, y_train, y_test = train_test_split(*load_breast_cancer(return_X_y=True), test_size=0.1, random_state=2)
 
         chunk_size = X_train.shape[0] // 10 + 1
         X_train = [X_train[i * chunk_size:(i + 1) * chunk_size, :] for i in range(X_train.shape[0] // chunk_size + 1)]
@@ -316,7 +311,7 @@ def check_asserts(data):
             self.assertAlmostEqual(data.label[1], data.weight[1])
             self.assertListEqual(data.feature_name, data.get_feature_name())
 
-        X, y = _load_breast_cancer(return_X_y=True)
+        X, y = load_breast_cancer(return_X_y=True)
         sequence = np.ones(y.shape[0])
         sequence[0] = np.nan
         sequence[1] = np.inf
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 67696174a711..3cb4c7ff55c3 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -10,8 +10,7 @@
 import lightgbm as lgb
 import numpy as np
 from scipy.sparse import csr_matrix, isspmatrix_csr, isspmatrix_csc
-from sklearn.datasets import (load_boston, load_breast_cancer, load_digits,
-                              load_iris, load_svmlight_file, make_multilabel_classification)
+from sklearn.datasets import load_svmlight_file, make_multilabel_classification
 from sklearn.metrics import log_loss, mean_absolute_error, mean_squared_error, roc_auc_score, average_precision_score
 from sklearn.model_selection import train_test_split, TimeSeriesSplit, GroupKFold
 
@@ -20,7 +19,7 @@
 except ImportError:
     import pickle
 
-from .utils import lru_cache
+from .utils import load_boston, load_breast_cancer, load_digits, load_iris, lru_cache
 
 
 decreasing_generator = itertools.count(0, -1)
@@ -53,29 +52,9 @@ def categorize(continuous_x):
     return np.digitize(continuous_x, bins=np.arange(0, 1, 0.01))
 
 
-@lru_cache(maxsize=None)
-def _load_boston(**kwargs):
-    return load_boston(**kwargs)
-
-
-@lru_cache(maxsize=None)
-def _load_breast_cancer(**kwargs):
-    return load_breast_cancer(**kwargs)
-
-
-@lru_cache(maxsize=None)
-def _load_digits(**kwargs):
-    return load_digits(**kwargs)
-
-
-@lru_cache(maxsize=None)
-def _load_iris(**kwargs):
-    return load_iris(**kwargs)
-
-
 class TestEngine(unittest.TestCase):
     def test_binary(self):
-        X, y = _load_breast_cancer(return_X_y=True)
+        X, y = load_breast_cancer(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'objective': 'binary',
@@ -97,7 +76,7 @@ def test_binary(self):
         self.assertAlmostEqual(evals_result['valid_0']['binary_logloss'][-1], ret, places=5)
 
     def test_rf(self):
-        X, y = _load_breast_cancer(return_X_y=True)
+        X, y = load_breast_cancer(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'boosting_type': 'rf',
@@ -122,7 +101,7 @@ def test_rf(self):
         self.assertAlmostEqual(evals_result['valid_0']['binary_logloss'][-1], ret, places=5)
 
     def test_regression(self):
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'metric': 'l2',
@@ -399,7 +378,7 @@ def test_categorical_non_zero_inputs(self):
         self.assertAlmostEqual(evals_result['valid_0']['auc'][-1], ret, places=5)
 
     def test_multiclass(self):
-        X, y = _load_digits(n_class=10, return_X_y=True)
+        X, y = load_digits(n_class=10, return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'objective': 'multiclass',
@@ -420,7 +399,7 @@ def test_multiclass(self):
         self.assertAlmostEqual(evals_result['valid_0']['multi_logloss'][-1], ret, places=5)
 
     def test_multiclass_rf(self):
-        X, y = _load_digits(n_class=10, return_X_y=True)
+        X, y = load_digits(n_class=10, return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'boosting_type': 'rf',
@@ -448,7 +427,7 @@ def test_multiclass_rf(self):
         self.assertAlmostEqual(evals_result['valid_0']['multi_logloss'][-1], ret, places=5)
 
     def test_multiclass_prediction_early_stopping(self):
-        X, y = _load_digits(n_class=10, return_X_y=True)
+        X, y = load_digits(n_class=10, return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'objective': 'multiclass',
@@ -474,7 +453,7 @@ def test_multiclass_prediction_early_stopping(self):
         self.assertLess(ret, 0.2)
 
     def test_multi_class_error(self):
-        X, y = _load_digits(n_class=10, return_X_y=True)
+        X, y = load_digits(n_class=10, return_X_y=True)
         params = {'objective': 'multiclass', 'num_classes': 10, 'metric': 'multi_error',
                   'num_leaves': 4, 'verbose': -1}
         lgb_data = lgb.Dataset(X, label=y)
@@ -519,7 +498,7 @@ def test_multi_class_error(self):
 
     def test_auc_mu(self):
         # should give same result as binary auc for 2 classes
-        X, y = _load_digits(n_class=10, return_X_y=True)
+        X, y = load_digits(n_class=10, return_X_y=True)
         y_new = np.zeros((len(y)))
         y_new[y != 0] = 1
         lgb_X = lgb.Dataset(X, label=y_new)
@@ -597,7 +576,7 @@ def test_auc_mu(self):
         self.assertNotEqual(results_weight['training']['auc_mu'][-1], results_no_weight['training']['auc_mu'][-1])
 
     def test_early_stopping(self):
-        X, y = _load_breast_cancer(return_X_y=True)
+        X, y = load_breast_cancer(return_X_y=True)
         params = {
             'objective': 'binary',
             'metric': 'binary_logloss',
@@ -629,7 +608,7 @@ def test_early_stopping(self):
         self.assertIn('binary_logloss', gbm.best_score[valid_set_name])
 
     def test_continue_train(self):
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'objective': 'regression',
@@ -657,7 +636,7 @@ def test_continue_train(self):
         os.remove(model_name)
 
     def test_continue_train_reused_dataset(self):
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         params = {
             'objective': 'regression',
             'verbose': -1
@@ -670,7 +649,7 @@ def test_continue_train_reused_dataset(self):
         self.assertEqual(gbm.current_iteration(), 20)
 
     def test_continue_train_dart(self):
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'boosting_type': 'dart',
@@ -693,7 +672,7 @@ def test_continue_train_dart(self):
         self.assertAlmostEqual(evals_result['valid_0']['l1'][-1], ret, places=5)
 
     def test_continue_train_multiclass(self):
-        X, y = _load_iris(return_X_y=True)
+        X, y = load_iris(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'objective': 'multiclass',
@@ -716,7 +695,7 @@ def test_continue_train_multiclass(self):
         self.assertAlmostEqual(evals_result['valid_0']['multi_logloss'][-1], ret, places=5)
 
     def test_cv(self):
-        X_train, y_train = _load_boston(return_X_y=True)
+        X_train, y_train = load_boston(return_X_y=True)
         params = {'verbose': -1}
         lgb_train = lgb.Dataset(X_train, y_train)
         # shuffle = False, override metric in params
@@ -775,7 +754,7 @@ def test_cv(self):
         np.testing.assert_allclose(cv_res_lambda['ndcg@3-mean'], cv_res_lambda_obj['ndcg@3-mean'])
 
     def test_cvbooster(self):
-        X, y = _load_breast_cancer(return_X_y=True)
+        X, y = load_breast_cancer(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'objective': 'binary',
@@ -819,7 +798,7 @@ def test_cvbooster(self):
         self.assertLess(ret, 0.15)
 
     def test_feature_name(self):
-        X_train, y_train = _load_boston(return_X_y=True)
+        X_train, y_train = load_boston(return_X_y=True)
         params = {'verbose': -1}
         lgb_train = lgb.Dataset(X_train, y_train)
         feature_names = ['f_' + str(i) for i in range(X_train.shape[-1])]
@@ -847,7 +826,7 @@ def test_feature_name_with_non_ascii(self):
 
     def test_save_load_copy_pickle(self):
         def train_and_predict(init_model=None, return_model=False):
-            X, y = _load_boston(return_X_y=True)
+            X, y = load_boston(return_X_y=True)
             X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
             params = {
                 'objective': 'regression',
@@ -1011,7 +990,7 @@ def test_reference_chain(self):
         self.assertEqual(len(evals_result['valid_1']['rmse']), 20)
 
     def test_contribs(self):
-        X, y = _load_breast_cancer(return_X_y=True)
+        X, y = load_breast_cancer(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'objective': 'binary',
@@ -1394,7 +1373,7 @@ def test_small_max_bin(self):
         np.random.seed()  # reset seed
 
     def test_refit(self):
-        X, y = _load_breast_cancer(return_X_y=True)
+        X, y = load_breast_cancer(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'objective': 'binary',
@@ -1410,7 +1389,7 @@ def test_refit(self):
         self.assertGreater(err_pred, new_err_pred)
 
     def test_mape_rf(self):
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         params = {
             'boosting_type': 'rf',
             'objective': 'mape',
@@ -1427,7 +1406,7 @@ def test_mape_rf(self):
         self.assertGreater(pred_mean, 20)
 
     def test_mape_dart(self):
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         params = {
             'boosting_type': 'dart',
             'objective': 'mape',
@@ -1506,7 +1485,7 @@ def preprocess_data(dtrain, dtest, params):
             params['num_class'] = 4
             return dtrain, dtest, params
 
-        X, y = _load_iris(return_X_y=True)
+        X, y = load_iris(return_X_y=True)
         dataset = lgb.Dataset(X, y, free_raw_data=False)
         params = {'objective': 'multiclass', 'num_class': 3, 'verbose': -1}
         results = lgb.cv(params, dataset, num_boost_round=10, fpreproc=preprocess_data)
@@ -1514,7 +1493,7 @@ def preprocess_data(dtrain, dtest, params):
         self.assertEqual(len(results['multi_logloss-mean']), 10)
 
     def test_metrics(self):
-        X, y = _load_digits(n_class=2, return_X_y=True)
+        X, y = load_digits(n_class=2, return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         lgb_train = lgb.Dataset(X_train, y_train, silent=True)
         lgb_valid = lgb.Dataset(X_test, y_test, reference=lgb_train, silent=True)
@@ -1820,7 +1799,7 @@ def train_booster(params=params_obj_verbose, **kwargs):
         self.assertEqual(len(evals_result), 1)
         self.assertIn('error', evals_result['valid_0'])
 
-        X, y = _load_digits(n_class=3, return_X_y=True)
+        X, y = load_digits(n_class=3, return_X_y=True)
         lgb_train = lgb.Dataset(X, y, silent=True)
 
         obj_multi_aliases = ['multiclass', 'softmax', 'multiclassova', 'multiclass_ova', 'ova', 'ovr']
@@ -1888,7 +1867,7 @@ def train_booster(params=params_obj_verbose, **kwargs):
                           params_class_3_verbose, metrics='binary_error', fobj=dummy_obj)
 
     def test_multiple_feval_train(self):
-        X, y = _load_breast_cancer(return_X_y=True)
+        X, y = load_breast_cancer(return_X_y=True)
 
         params = {'verbose': -1, 'objective': 'binary', 'metric': 'binary_logloss'}
 
@@ -1911,7 +1890,7 @@ def test_multiple_feval_train(self):
         self.assertIn('decreasing_metric', evals_result['valid_0'])
 
     def test_multiple_feval_cv(self):
-        X, y = _load_breast_cancer(return_X_y=True)
+        X, y = load_breast_cancer(return_X_y=True)
 
         params = {'verbose': -1, 'objective': 'binary', 'metric': 'binary_logloss'}
 
@@ -1934,7 +1913,7 @@ def test_multiple_feval_cv(self):
 
     @unittest.skipIf(psutil.virtual_memory().available / 1024 / 1024 / 1024 < 3, 'not enough RAM')
     def test_model_size(self):
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         data = lgb.Dataset(X, y)
         bst = lgb.train({'verbose': -1}, data, num_boost_round=2)
         y_pred = bst.predict(X)
@@ -1960,7 +1939,7 @@ def test_model_size(self):
             self.skipTest('not enough RAM')
 
     def test_get_split_value_histogram(self):
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         lgb_train = lgb.Dataset(X, y, categorical_feature=[2])
         gbm = lgb.train({'verbose': -1}, lgb_train, num_boost_round=20)
         # test XGBoost-style return value
@@ -2070,7 +2049,7 @@ def metrics_combination_cv_regression(metric_list, assumed_iteration,
                          eval_train_metric=eval_train_metric)
             self.assertEqual(assumed_iteration, len(ret[list(ret.keys())[0]]))
 
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
         X_test1, X_test2, y_test1, y_test2 = train_test_split(X_test, y_test, test_size=0.5, random_state=73)
         lgb_train = lgb.Dataset(X_train, y_train)
@@ -2148,7 +2127,7 @@ def metrics_combination_cv_regression(metric_list, assumed_iteration,
                                                                            decreasing_metric(preds, train_data)])
 
     def test_node_level_subcol(self):
-        X, y = _load_breast_cancer(return_X_y=True)
+        X, y = load_breast_cancer(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         params = {
             'objective': 'binary',
@@ -2329,7 +2308,7 @@ def test_dataset_params_with_reference(self):
 
     def test_extra_trees(self):
         # check extra trees increases regularization
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         lgb_x = lgb.Dataset(X, label=y)
         params = {'objective': 'regression',
                   'num_leaves': 32,
@@ -2347,7 +2326,7 @@ def test_extra_trees(self):
 
     def test_path_smoothing(self):
         # check path smoothing increases regularization
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         lgb_x = lgb.Dataset(X, label=y)
         params = {'objective': 'regression',
                   'num_leaves': 32,
@@ -2369,7 +2348,7 @@ def _imptcs_to_numpy(X, impcts_dict):
             cols = ['Column_' + str(i) for i in range(X.shape[1])]
             return [impcts_dict.get(col, 0.) for col in cols]
 
-        X, y = _load_breast_cancer(return_X_y=True)
+        X, y = load_breast_cancer(return_X_y=True)
         data = lgb.Dataset(X, label=y)
         num_trees = 10
         bst = lgb.train({"objective": "binary", "verbose": -1}, data, num_trees)
@@ -2414,7 +2393,7 @@ def _imptcs_to_numpy(X, impcts_dict):
             self.assertIsNone(tree_df.loc[0, col])
 
     def test_interaction_constraints(self):
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         num_features = X.shape[1]
         train_data = lgb.Dataset(X, label=y)
         # check that constraint containing all features is equivalent to no constraint
@@ -2491,7 +2470,7 @@ def inner_test(X, y, params, early_stopping_rounds):
             np.testing.assert_allclose(pred4, pred6)
 
         # test for regression
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         params = {
             'objective': 'regression',
             'verbose': -1,
@@ -2504,7 +2483,7 @@ def inner_test(X, y, params, early_stopping_rounds):
         inner_test(X, y, params, early_stopping_rounds=None)
 
         # test for multi-class
-        X, y = _load_iris(return_X_y=True)
+        X, y = load_iris(return_X_y=True)
         params = {
             'objective': 'multiclass',
             'metric': 'multi_logloss',
@@ -2518,7 +2497,7 @@ def inner_test(X, y, params, early_stopping_rounds):
         inner_test(X, y, params, early_stopping_rounds=None)
 
         # test for binary
-        X, y = _load_breast_cancer(return_X_y=True)
+        X, y = load_breast_cancer(return_X_y=True)
         params = {
             'objective': 'binary',
             'metric': 'binary_logloss',
@@ -2546,6 +2525,7 @@ def test_average_precision_metric(self):
         sklearn_ap = average_precision_score(y, pred)
         self.assertAlmostEqual(ap, sklearn_ap)
         # test that average precision is 1 where model predicts perfectly
+        y = y.copy()
         y[:] = 1
         lgb_X = lgb.Dataset(X, label=y)
         lgb.train(params, lgb_X, num_boost_round=1, valid_sets=[lgb_X], evals_result=res)
diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index 678d135099fe..103518de0171 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -10,9 +10,7 @@
 import numpy as np
 from sklearn import __version__ as sk_version
 from sklearn.base import clone
-from sklearn.datasets import (load_boston, load_breast_cancer, load_digits,
-                              load_iris, load_linnerud, load_svmlight_file,
-                              make_multilabel_classification)
+from sklearn.datasets import load_svmlight_file, make_multilabel_classification
 from sklearn.exceptions import SkipTestWarning
 from sklearn.metrics import log_loss, mean_squared_error
 from sklearn.model_selection import GridSearchCV, RandomizedSearchCV, train_test_split
@@ -22,7 +20,7 @@
                                             check_parameters_default_constructible)
 from sklearn.utils.validation import check_is_fitted
 
-from .utils import lru_cache
+from .utils import load_boston, load_breast_cancer, load_digits, load_iris, load_linnerud, lru_cache
 
 
 decreasing_generator = itertools.count(0, -1)
@@ -76,35 +74,10 @@ def multi_logloss(y_true, y_pred):
     return np.mean([-math.log(y_pred[i][y]) for i, y in enumerate(y_true)])
 
 
-@lru_cache(maxsize=None)
-def _load_boston(**kwargs):
-    return load_boston(**kwargs)
-
-
-@lru_cache(maxsize=None)
-def _load_breast_cancer(**kwargs):
-    return load_breast_cancer(**kwargs)
-
-
-@lru_cache(maxsize=None)
-def _load_digits(**kwargs):
-    return load_digits(**kwargs)
-
-
-@lru_cache(maxsize=None)
-def _load_iris(**kwargs):
-    return load_iris(**kwargs)
-
-
-@lru_cache(maxsize=None)
-def _load_linnerud(**kwargs):
-    return load_linnerud(**kwargs)
-
-
 class TestSklearn(unittest.TestCase):
 
     def test_binary(self):
-        X, y = _load_breast_cancer(return_X_y=True)
+        X, y = load_breast_cancer(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         gbm = lgb.LGBMClassifier(n_estimators=50, silent=True)
         gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
@@ -113,7 +86,7 @@ def test_binary(self):
         self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['binary_logloss'][gbm.best_iteration_ - 1], places=5)
 
     def test_regression(self):
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         gbm = lgb.LGBMRegressor(n_estimators=50, silent=True)
         gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
@@ -122,7 +95,7 @@ def test_regression(self):
         self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1], places=5)
 
     def test_multiclass(self):
-        X, y = _load_digits(n_class=10, return_X_y=True)
+        X, y = load_digits(n_class=10, return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         gbm = lgb.LGBMClassifier(n_estimators=50, silent=True)
         gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
@@ -165,7 +138,7 @@ def test_xendcg(self):
         self.assertGreater(gbm.best_score_['valid_0']['ndcg@3'], 0.6253)
 
     def test_regression_with_custom_objective(self):
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         gbm = lgb.LGBMRegressor(n_estimators=50, silent=True, objective=objective_ls)
         gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
@@ -174,7 +147,7 @@ def test_regression_with_custom_objective(self):
         self.assertAlmostEqual(ret, gbm.evals_result_['valid_0']['l2'][gbm.best_iteration_ - 1], places=5)
 
     def test_binary_classification_with_custom_objective(self):
-        X, y = _load_digits(n_class=2, return_X_y=True)
+        X, y = load_digits(n_class=2, return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         gbm = lgb.LGBMClassifier(n_estimators=50, silent=True, objective=logregobj)
         gbm.fit(X_train, y_train, eval_set=[(X_test, y_test)], early_stopping_rounds=5, verbose=False)
@@ -186,7 +159,7 @@ def test_binary_classification_with_custom_objective(self):
         self.assertLess(ret, 0.05)
 
     def test_dart(self):
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         gbm = lgb.LGBMRegressor(boosting_type='dart', n_estimators=50)
         gbm.fit(X_train, y_train)
@@ -199,7 +172,7 @@ def test_dart(self):
     def test_stacking_classifier(self):
         from sklearn.ensemble import StackingClassifier
 
-        X, y = _load_iris(return_X_y=True)
+        X, y = load_iris(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
         classifiers = [('gbm1', lgb.LGBMClassifier(n_estimators=3)),
                        ('gbm2', lgb.LGBMClassifier(n_estimators=3))]
@@ -226,7 +199,7 @@ def test_stacking_classifier(self):
     def test_stacking_regressor(self):
         from sklearn.ensemble import StackingRegressor
 
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
         regressors = [('gbm1', lgb.LGBMRegressor(n_estimators=3)),
                       ('gbm2', lgb.LGBMRegressor(n_estimators=3))]
@@ -245,7 +218,7 @@ def test_stacking_regressor(self):
         self.assertEqual(len(reg.final_estimator_.feature_importances_), 15)
 
     def test_grid_search(self):
-        X, y = _load_iris(return_X_y=True)
+        X, y = load_iris(return_X_y=True)
         y = y.astype(str)  # utilize label encoder at it's max power
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
                                                             random_state=42)
@@ -275,7 +248,7 @@ def test_grid_search(self):
         self.assertLessEqual(score, 1.)
 
     def test_random_search(self):
-        X, y = _load_iris(return_X_y=True)
+        X, y = load_iris(return_X_y=True)
         y = y.astype(str)  # utilize label encoder at it's max power
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
                                                             random_state=42)
@@ -329,7 +302,7 @@ def test_multioutput_classifier(self):
     # sklearn < 0.23 does not have as_frame parameter
     @unittest.skipIf(sk_version < '0.23.0', 'scikit-learn version is less than 0.23')
     def test_multioutput_regressor(self):
-        bunch = _load_linnerud(as_frame=True)  # returns a Bunch instance
+        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
         X, y = bunch['data'], bunch['target']
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1,
                                                             random_state=42)
@@ -368,7 +341,7 @@ def test_classifier_chain(self):
     # sklearn < 0.23 does not have as_frame parameter
     @unittest.skipIf(sk_version < '0.23.0', 'scikit-learn version is less than 0.23')
     def test_regressor_chain(self):
-        bunch = _load_linnerud(as_frame=True)  # returns a Bunch instance
+        bunch = load_linnerud(as_frame=True)  # returns a Bunch instance
         X, y = bunch['data'], bunch['target']
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         order = [2, 0, 1]
@@ -385,7 +358,7 @@ def test_regressor_chain(self):
             self.assertIsInstance(regressor.booster_, lgb.Booster)
 
     def test_clone_and_property(self):
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         gbm = lgb.LGBMRegressor(n_estimators=10, silent=True)
         gbm.fit(X, y, verbose=False)
 
@@ -393,7 +366,7 @@ def test_clone_and_property(self):
         self.assertIsInstance(gbm.booster_, lgb.Booster)
         self.assertIsInstance(gbm.feature_importances_, np.ndarray)
 
-        X, y = _load_digits(n_class=2, return_X_y=True)
+        X, y = load_digits(n_class=2, return_X_y=True)
         clf = lgb.LGBMClassifier(n_estimators=10, silent=True)
         clf.fit(X, y, verbose=False)
         self.assertListEqual(sorted(clf.classes_), [0, 1])
@@ -402,7 +375,7 @@ def test_clone_and_property(self):
         self.assertIsInstance(clf.feature_importances_, np.ndarray)
 
     def test_joblib(self):
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         gbm = lgb.LGBMRegressor(n_estimators=10, objective=custom_asymmetric_obj,
                                 silent=True, importance_type='split')
@@ -427,7 +400,7 @@ def test_joblib(self):
         np.testing.assert_allclose(pred_origin, pred_pickle)
 
     def test_random_state_object(self):
-        X, y = _load_iris(return_X_y=True)
+        X, y = load_iris(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         state1 = np.random.RandomState(123)
         state2 = np.random.RandomState(123)
@@ -460,14 +433,14 @@ def test_random_state_object(self):
                           df1, df3)
 
     def test_feature_importances_single_leaf(self):
-        data = _load_iris(return_X_y=False)
+        data = load_iris(return_X_y=False)
         clf = lgb.LGBMClassifier(n_estimators=10)
         clf.fit(data.data, data.target)
         importances = clf.feature_importances_
         self.assertEqual(len(importances), 4)
 
     def test_feature_importances_type(self):
-        data = _load_iris(return_X_y=False)
+        data = load_iris(return_X_y=False)
         clf = lgb.LGBMClassifier(n_estimators=10)
         clf.fit(data.data, data.target)
         clf.set_params(importance_type='split')
@@ -591,7 +564,7 @@ def test_pandas_sparse(self):
 
     def test_predict(self):
         # With default params
-        iris = _load_iris(return_X_y=False)
+        iris = load_iris(return_X_y=False)
         X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,
                                                             test_size=0.2, random_state=42)
 
@@ -671,7 +644,7 @@ def test_predict(self):
                           res_engine, res_sklearn_params)
 
     def test_evaluate_train_set(self):
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         gbm = lgb.LGBMRegressor(n_estimators=10, silent=True)
         gbm.fit(X_train, y_train, eval_set=[(X_train, y_train), (X_test, y_test)], verbose=False)
@@ -684,7 +657,7 @@ def test_evaluate_train_set(self):
         self.assertIn('l2', gbm.evals_result_['valid_1'])
 
     def test_metrics(self):
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         params = {'n_estimators': 2, 'verbose': -1}
         params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False}
 
@@ -724,7 +697,7 @@ def test_metrics(self):
         self.assertIn('mape', gbm.evals_result_['training'])
 
         # non-default metric with multiple metrics in eval_metric for LGBMClassifier
-        X_classification, y_classification = _load_breast_cancer(return_X_y=True)
+        X_classification, y_classification = load_breast_cancer(return_X_y=True)
         params_classification = {'n_estimators': 2, 'verbose': -1,
                                  'objective': 'binary', 'metric': 'binary_logloss'}
         params_fit_classification = {'X': X_classification, 'y': y_classification,
@@ -907,7 +880,7 @@ def test_metrics(self):
         self.assertIn('mape', gbm.evals_result_['training'])
         self.assertIn('error', gbm.evals_result_['training'])
 
-        X, y = _load_digits(n_class=3, return_X_y=True)
+        X, y = load_digits(n_class=3, return_X_y=True)
         params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False}
 
         # default metric and invalid binary metric is replaced with multiclass alternative
@@ -934,7 +907,7 @@ def test_metrics(self):
         self.assertIn('multi_logloss', gbm.evals_result_['training'])
         self.assertIn('multi_error', gbm.evals_result_['training'])
 
-        X, y = _load_digits(n_class=2, return_X_y=True)
+        X, y = load_digits(n_class=2, return_X_y=True)
         params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False}
 
         # default metric and invalid multiclass metric is replaced with binary alternative
@@ -951,7 +924,7 @@ def test_metrics(self):
 
     def test_multiple_eval_metrics(self):
 
-        X, y = _load_breast_cancer(return_X_y=True)
+        X, y = load_breast_cancer(return_X_y=True)
 
         params = {'n_estimators': 2, 'verbose': -1, 'objective': 'binary', 'metric': 'binary_logloss'}
         params_fit = {'X': X, 'y': y, 'eval_set': (X, y), 'verbose': False}
@@ -1030,7 +1003,7 @@ def fit_and_check(eval_set_names, metric_names, assumed_iteration, first_metric_
                     self.assertEqual(assumed_iteration if eval_set_name != 'training' else gbm.n_estimators,
                                      gbm.best_iteration_)
 
-        X, y = _load_boston(return_X_y=True)
+        X, y = load_boston(return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
         X_test1, X_test2, y_test1, y_test2 = train_test_split(X_test, y_test, test_size=0.5, random_state=72)
         params = {'n_estimators': 30,
@@ -1111,7 +1084,7 @@ def fit_and_check(eval_set_names, metric_names, assumed_iteration, first_metric_
         fit_and_check(['valid_0', 'valid_1'], ['l1', 'l2'], iter_min_l2, True)
 
     def test_class_weight(self):
-        X, y = _load_digits(n_class=10, return_X_y=True)
+        X, y = load_digits(n_class=10, return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
         y_train_str = y_train.astype('str')
         y_test_str = y_test.astype('str')
@@ -1145,7 +1118,7 @@ def test_class_weight(self):
                                            gbm_str.evals_result_[eval_set][metric])
 
     def test_continue_training_with_model(self):
-        X, y = _load_digits(n_class=3, return_X_y=True)
+        X, y = load_digits(n_class=3, return_X_y=True)
         X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)
         init_gbm = lgb.LGBMClassifier(n_estimators=5).fit(X_train, y_train, eval_set=(X_test, y_test),
                                                           verbose=False)
@@ -1160,7 +1133,7 @@ def test_continue_training_with_model(self):
     # sklearn < 0.22 requires passing "attributes" argument
     @unittest.skipIf(sk_version < '0.22.0', 'scikit-learn version is less than 0.22')
     def test_check_is_fitted(self):
-        X, y = _load_digits(n_class=2, return_X_y=True)
+        X, y = load_digits(n_class=2, return_X_y=True)
         est = lgb.LGBMModel(n_estimators=5, objective="binary")
         clf = lgb.LGBMClassifier(n_estimators=5)
         reg = lgb.LGBMRegressor(n_estimators=5)
diff --git a/tests/python_package_test/utils.py b/tests/python_package_test/utils.py
index 6c3e4601a1dd..f0b160d60dfb 100644
--- a/tests/python_package_test/utils.py
+++ b/tests/python_package_test/utils.py
@@ -1,4 +1,6 @@
 # coding: utf-8
+import sklearn.datasets
+
 try:
     from functools import lru_cache
 except ImportError:
@@ -16,3 +18,28 @@ def wrapper(*args, **kwargs):
                 return cache[arg_key]
             return wrapper
         return _lru_wrapper
+
+
+@lru_cache(maxsize=None)
+def load_boston(**kwargs):
+    return sklearn.datasets.load_boston(**kwargs)
+
+
+@lru_cache(maxsize=None)
+def load_breast_cancer(**kwargs):
+    return sklearn.datasets.load_breast_cancer(**kwargs)
+
+
+@lru_cache(maxsize=None)
+def load_digits(**kwargs):
+    return sklearn.datasets.load_digits(**kwargs)
+
+
+@lru_cache(maxsize=None)
+def load_iris(**kwargs):
+    return sklearn.datasets.load_iris(**kwargs)
+
+
+@lru_cache(maxsize=None)
+def load_linnerud(**kwargs):
+    return sklearn.datasets.load_linnerud(**kwargs)

From dfb0fd35cdea555d8b8aac800fbb01528fbe0ca4 Mon Sep 17 00:00:00 2001
From: James Lamb <jaylamb20@gmail.com>
Date: Thu, 29 Oct 2020 00:03:50 -0500
Subject: [PATCH 14/14] cache in test_plotting

---
 tests/python_package_test/test_basic.py    | 2 +-
 tests/python_package_test/test_engine.py   | 2 +-
 tests/python_package_test/test_plotting.py | 3 ++-
 tests/python_package_test/test_sklearn.py  | 2 +-
 4 files changed, 5 insertions(+), 4 deletions(-)

diff --git a/tests/python_package_test/test_basic.py b/tests/python_package_test/test_basic.py
index f53933021b14..a0ce5b8f8b66 100644
--- a/tests/python_package_test/test_basic.py
+++ b/tests/python_package_test/test_basic.py
@@ -10,7 +10,7 @@
 from sklearn.datasets import dump_svmlight_file, load_svmlight_file
 from sklearn.model_selection import train_test_split
 
-from .utils import load_breast_cancer, lru_cache
+from .utils import load_breast_cancer
 
 
 class TestBasic(unittest.TestCase):
diff --git a/tests/python_package_test/test_engine.py b/tests/python_package_test/test_engine.py
index 3cb4c7ff55c3..de8689fd3ea5 100644
--- a/tests/python_package_test/test_engine.py
+++ b/tests/python_package_test/test_engine.py
@@ -19,7 +19,7 @@
 except ImportError:
     import pickle
 
-from .utils import load_boston, load_breast_cancer, load_digits, load_iris, lru_cache
+from .utils import load_boston, load_breast_cancer, load_digits, load_iris
 
 
 decreasing_generator = itertools.count(0, -1)
diff --git a/tests/python_package_test/test_plotting.py b/tests/python_package_test/test_plotting.py
index 786b79760910..293012348ac3 100644
--- a/tests/python_package_test/test_plotting.py
+++ b/tests/python_package_test/test_plotting.py
@@ -3,7 +3,6 @@
 
 import lightgbm as lgb
 from lightgbm.compat import MATPLOTLIB_INSTALLED, GRAPHVIZ_INSTALLED
-from sklearn.datasets import load_breast_cancer
 from sklearn.model_selection import train_test_split
 
 if MATPLOTLIB_INSTALLED:
@@ -12,6 +11,8 @@
 if GRAPHVIZ_INSTALLED:
     import graphviz
 
+from .utils import load_breast_cancer
+
 
 class TestBasic(unittest.TestCase):
 
diff --git a/tests/python_package_test/test_sklearn.py b/tests/python_package_test/test_sklearn.py
index 103518de0171..623f83a517a5 100644
--- a/tests/python_package_test/test_sklearn.py
+++ b/tests/python_package_test/test_sklearn.py
@@ -20,7 +20,7 @@
                                             check_parameters_default_constructible)
 from sklearn.utils.validation import check_is_fitted
 
-from .utils import load_boston, load_breast_cancer, load_digits, load_iris, load_linnerud, lru_cache
+from .utils import load_boston, load_breast_cancer, load_digits, load_iris, load_linnerud
 
 
 decreasing_generator = itertools.count(0, -1)