Bugfixes in examples and tree params (#57)

bkleyn · web-flow · commit 646a3987f3a8 · 2022-03-28T12:15:06.000-04:00
diff --git a/CHANGELOG.txt b/CHANGELOG.txt
@@ -2,6 +2,12 @@
 MABWiser CHANGELOG
 =====================
 
+March, 28, 2022 2.4.1
+-------------------------------------------------------------------------------
+minor:
+- Bug fixes in examples
+- Validate tree parameters of TreeBandit to be compatible with sklearn.tree.DecisionTreeRegressor
+
 March, 17, 2022 2.4.0
 -------------------------------------------------------------------------------
 major:
diff --git a/examples/context_free_mab.py b/examples/context_free_mab.py
@@ -83,7 +83,7 @@
 
 # Results
 print("Randomized Popularity: ", prediction, " ", expectations)
-assert(prediction == 2)
+assert(prediction == 1)
 
 ###################################
 # Softmax Learning Policy
diff --git a/examples/customized_mab.py b/examples/customized_mab.py
@@ -120,7 +120,7 @@ def predict(self, contexts: np.ndarray=None):
 class LinUCBColdStart(_Linear):
     def __init__(self, rng, arms, n_jobs, backend, l2_lambda=1.0, alpha=1.0, features=None):
         # initialize the parent class as is
-        super().__init__(rng, arms, n_jobs, backend, l2_lambda, alpha, 'ucb')
+        super().__init__(rng, arms, n_jobs, backend, alpha, 0.0, l2_lambda, 'ucb', False)
 
         # save the feature vectors
         self.features = features
diff --git a/examples/parallel_mab.py b/examples/parallel_mab.py
@@ -39,24 +39,13 @@
 rewards_train, rewards_test = train_test_split(rewards, test_size=0.3, random_state=seed)
 decisions_train, decisions_test = train_test_split(decisions, test_size=0.3, random_state=seed)
 
-# Fit standard scaler for each arm
-arm_to_scaler = {}
-for arm in arms:
-    # Get indices for arm
-    indices = np.where(decisions_train == arm)
-
-    # Fit standard scaler
-    scaler = StandardScaler()
-    scaler.fit(contexts[indices])
-    arm_to_scaler[arm] = scaler
-
 ########################################################
 # LinUCB Learning Policy
 ########################################################
 
 # LinUCB learning policy with alpha 1.25 and n_jobs = -1 (maximum available cores)
 linucb = MAB(arms=arms,
-             learning_policy=LearningPolicy.LinUCB(alpha=1.25, arm_to_scaler=arm_to_scaler),
+             learning_policy=LearningPolicy.LinUCB(alpha=1.25, scale=True),
              n_jobs=-1)
 
 # Learn from playlists shown and observed click rewards for each arm
diff --git a/mabwiser/_version.py b/mabwiser/_version.py
@@ -3,5 +3,5 @@
 
 __author__ = "FMR LLC"
 __email__ = "opensource@fmr.com"
-__version__ = "2.4.0"
+__version__ = "2.4.1"
 __copyright__ = "Copyright (C), FMR LLC"
diff --git a/mabwiser/mab.py b/mabwiser/mab.py
@@ -15,7 +15,7 @@
 import numpy as np
 import pandas as pd
 from sklearn.cluster import MiniBatchKMeans
-from sklearn.tree import DecisionTreeClassifier
+from sklearn.tree import DecisionTreeRegressor
 
 from mabwiser._version import __author__, __email__, __version__, __copyright__
 from mabwiser.approximate import _LSHNearest
@@ -633,9 +633,9 @@ class TreeBandit(NamedTuple):
         ----------
         tree_parameters: Dict, **kwarg
             Parameters of the decision tree.
-            The keys must match the parameters of sklearn.tree.DecisionTreeClassifier.
+            The keys must match the parameters of sklearn.tree.DecisionTreeRegressor.
             When a parameter is not given, the default parameters from
-            sklearn.tree.DecisionTreeClassifier will be chosen.
+            sklearn.tree.DecisionTreeRegressor will be chosen.
             Default value is an empty dictionary.
 
         Example
@@ -655,10 +655,10 @@ class TreeBandit(NamedTuple):
 
         def _validate(self):
             check_true(isinstance(self.tree_parameters, dict), TypeError("tree_parameters must be a dictionary."))
-            tree = DecisionTreeClassifier()
+            tree = DecisionTreeRegressor()
             for key in self.tree_parameters.keys():
                 check_true(key in tree.__dict__.keys(),
-                           ValueError("sklearn.tree.DecisionTreeClassifier doesn't have a parameter " + str(key) + "."))
+                           ValueError("sklearn.tree.DecisionTreeRegressor doesn't have a parameter " + str(key) + "."))
 
         def _is_compatible(self, learning_policy: LearningPolicy):
             # TreeBandit is compatible with these learning policies
diff --git a/tests/test_base.py b/tests/test_base.py
@@ -118,11 +118,11 @@ def predict(arms: List[Arm],
             return expectations[0] if num_run == 1 else expectations, mab
 
     @staticmethod
-    def is_compatible(lp, np):
+    def is_compatible(learning_policy, neighborhood_policy):
 
         # Special case for TreeBandit lp/np compatibility
-        if isinstance(np, NeighborhoodPolicy.TreeBandit):
-            return np._is_compatible(lp)
+        if isinstance(neighborhood_policy, NeighborhoodPolicy.TreeBandit):
+            return neighborhood_policy._is_compatible(learning_policy)
 
         return True