use pandas DataFrame in LSBoostRegressor, LSBoostClassifier and AdaOpt

Techtonique · Apr 26, 2024 · ff57cd4 · ff57cd4
1 parent 75f43ec
commit ff57cd4
Show file tree

Hide file tree

Showing 15 changed files with 68 additions and 140 deletions.
diff --git a/CHANGES.md b/CHANGES.md
@@ -1,3 +1,7 @@
+# version 0.15.0
+
+- Can use pandas DataFrame in `LSBoostRegressor`, `LSBoostClassifier` and `AdaOpt`
+
 # version 0.14.0
 
 - add polynomial (interactions) features to `LSBoostRegressor` and `LSBoostClassifier`

diff --git a/mlsauce.egg-info/PKG-INFO b/mlsauce.egg-info/PKG-INFO
diff --git a/mlsauce.egg-info/SOURCES.txt b/mlsauce.egg-info/SOURCES.txt
diff --git a/mlsauce.egg-info/dependency_links.txt b/mlsauce.egg-info/dependency_links.txt
diff --git a/mlsauce.egg-info/not-zip-safe b/mlsauce.egg-info/not-zip-safe
diff --git a/mlsauce.egg-info/requires.txt b/mlsauce.egg-info/requires.txt
diff --git a/mlsauce.egg-info/top_level.txt b/mlsauce.egg-info/top_level.txt
diff --git a/mlsauce/adaopt/__init__.py b/mlsauce/adaopt/__init__.py
@@ -1,6 +1,3 @@
-try:
-    from ._adaopt import AdaOpt
-except ImportError:
-    pass
+from ._adaopt import AdaOpt
 
 __all__ = ["AdaOpt"]
diff --git a/mlsauce/adaopt/_adaopt.py b/mlsauce/adaopt/_adaopt.py
@@ -1,5 +1,6 @@
-import numpy as np
 import pickle
+import numpy as np
+import pandas as pd
 from joblib import Parallel, delayed
 from joblib import wrap_non_picklable_objects
 from sklearn.base import BaseEstimator
@@ -17,6 +18,7 @@
     pyximport.install()
     import _adaoptc as adaoptc
 
+
 class AdaOpt(BaseEstimator, ClassifierMixin):
     """AdaOpt classifier.
 
@@ -170,6 +172,9 @@ def fit(self, X, y, **kwargs):
 
         """
 
+        if isinstance(X, pd.DataFrame):
+            X = X.values
+
         if self.n_clusters_input > 0:
             clustered_X, self.scaler_, self.label_encoder_, self.clusterer_ = (
                 cluster(
@@ -259,6 +264,9 @@ def predict_proba(self, X, **kwargs):
 
         """
 
+        if isinstance(X, pd.DataFrame):
+            X = X.values
+
         n_train, p_train = self.scaled_X_train.shape
 
         if self.n_clusters_input > 0:

diff --git a/mlsauce/booster/_booster_classifier.py b/mlsauce/booster/_booster_classifier.py
@@ -2,9 +2,11 @@
     from . import _boosterc as boosterc
 except ImportError:
     import pyximport
+
     pyximport.install()
     import _boosterc as boosterc
 import numpy as np
+import pandas as pd
 import platform
 import warnings
 from sklearn.preprocessing import PolynomialFeatures
@@ -71,7 +73,7 @@ class LSBoostClassifier(BaseEstimator, ClassifierMixin):
 
         cluster_scaling: str
             scaling method for clustering: currently 'standard', 'robust', 'minmax'
-                
+
         degree: int
             degree of features interactions to include in the model
 
@@ -96,7 +98,7 @@ def __init__(
         n_clusters=0,
         clustering_method="kmeans",
         cluster_scaling="standard",
-        degree=0
+        degree=0,
     ):
         if n_clusters > 0:
             assert clustering_method in (
@@ -148,7 +150,7 @@ def __init__(
         self.cluster_scaling = cluster_scaling
         self.scaler_, self.label_encoder_, self.clusterer_ = None, None, None
         self.degree = degree
-        self.poly_ = None 
+        self.poly_ = None
 
     def fit(self, X, y, **kwargs):
         """Fit Booster (classifier) to training data (X, y)
@@ -169,9 +171,14 @@ def fit(self, X, y, **kwargs):
             self: object.
         """
 
+        if isinstance(X, pd.DataFrame):
+            X = X.values
+
         if self.degree > 1:
-            self.poly_ = PolynomialFeatures(degree=self.degree, interaction_only=True)
-            X = self.poly_.fit_transform(X.copy())[:,1:]
+            self.poly_ = PolynomialFeatures(
+                degree=self.degree, interaction_only=True, include_bias=False
+            )
+            X = self.poly_.fit_transform(X)
 
         if self.n_clusters > 0:
             clustered_X, self.scaler_, self.label_encoder_, self.clusterer_ = (
@@ -184,7 +191,7 @@ def fit(self, X, y, **kwargs):
                     seed=self.seed,
                 )
             )
-            X = np.column_stack((X.copy(), clustered_X))
+            X = np.column_stack((X, clustered_X))
 
         self.obj = boosterc.fit_booster_classifier(
             np.asarray(X, order="C"),
@@ -243,13 +250,17 @@ def predict_proba(self, X, **kwargs):
 
             probability estimates for test data: {array-like}
         """
-        if self.degree > 0:            
-            X = self.poly_.transform(X.copy())[:,1:]
+
+        if isinstance(X, pd.DataFrame):
+            X = X.values
+
+        if self.degree > 0:
+            X = self.poly_.transform(X)
 
         if self.n_clusters > 0:
             X = np.column_stack(
                 (
-                    X.copy(),
+                    X,
                     cluster(
                         X,
                         training=False,

diff --git a/mlsauce/booster/_booster_regressor.py b/mlsauce/booster/_booster_regressor.py
@@ -2,9 +2,11 @@
     from . import _boosterc as boosterc
 except ImportError:
     import pyximport
+
     pyximport.install()
     import _boosterc as boosterc
 import numpy as np
+import pandas as pd
 import platform
 import warnings
 from sklearn.base import BaseEstimator
@@ -82,7 +84,7 @@ class LSBoostRegressor(BaseEstimator, RegressorMixin):
 
         cluster_scaling: str
             scaling method for clustering: currently 'standard', 'robust', 'minmax'
-        
+
         degree: int
             degree of features interactions to include in the model
 
@@ -110,7 +112,7 @@ def __init__(
         n_clusters=0,
         clustering_method="kmeans",
         cluster_scaling="standard",
-        degree=0
+        degree=0,
     ):
         if n_clusters > 0:
             assert clustering_method in (
@@ -163,7 +165,7 @@ def __init__(
         self.n_clusters = n_clusters
         self.clustering_method = clustering_method
         self.cluster_scaling = cluster_scaling
-        self.scaler_, self.label_encoder_, self.clusterer_ = None, None, None        
+        self.scaler_, self.label_encoder_, self.clusterer_ = None, None, None
         self.degree = degree
         self.poly_ = None
 
@@ -186,9 +188,14 @@ def fit(self, X, y, **kwargs):
             self: object.
         """
 
+        if isinstance(X, pd.DataFrame):
+            X = X.values
+
         if self.degree > 1:
-            self.poly_ = PolynomialFeatures(degree=self.degree, interaction_only=True)
-            X = self.poly_.fit_transform(X.copy())[:,1:]
+            self.poly_ = PolynomialFeatures(
+                degree=self.degree, interaction_only=True, include_bias=False
+            )
+            X = self.poly_.fit_transform(X)
 
         if self.n_clusters > 0:
             clustered_X, self.scaler_, self.label_encoder_, self.clusterer_ = (
@@ -201,7 +208,7 @@ def fit(self, X, y, **kwargs):
                     seed=self.seed,
                 )
             )
-            X = np.column_stack((X.copy(), clustered_X))
+            X = np.column_stack((X, clustered_X))
 
         self.obj = boosterc.fit_booster_regressor(
             X=np.asarray(X, order="C"),
@@ -253,13 +260,17 @@ def predict(self, X, level=95, method=None, **kwargs):
 
             probability estimates for test data: {array-like}
         """
-        if self.degree > 0:                        
-            X = self.poly_.transform(X.copy())[:,1:]
+
+        if isinstance(X, pd.DataFrame):
+            X = X.values
+
+        if self.degree > 0:
+            X = self.poly_.transform(X)
 
         if self.n_clusters > 0:
             X = np.column_stack(
                 (
-                    X.copy(),
+                    X,
                     cluster(
                         X,
                         training=False,

diff --git a/mlsauce/lasso/_lasso.py b/mlsauce/lasso/_lasso.py
@@ -5,10 +5,12 @@
 from sklearn.base import BaseEstimator
 from sklearn.base import RegressorMixin
 from numpy.linalg import inv
-try: 
+
+try:
     from . import _lassoc as mo
 except ImportError:
     import pyximport
+
     pyximport.install()
     import _lassoc as mo
 from ..utils import get_beta

diff --git a/mlsauce/ridge/_ridge.py b/mlsauce/ridge/_ridge.py
@@ -4,10 +4,12 @@
 from sklearn.base import BaseEstimator
 from sklearn.base import RegressorMixin
 from numpy.linalg import inv
-try: 
+
+try:
     from . import _ridgec as mo
 except ImportError:
     import pyximport
+
     pyximport.install()
     import _ridgec as mo
 from ..utils import get_beta

diff --git a/mlsauce/stump/_stump_classifier.py b/mlsauce/stump/_stump_classifier.py
@@ -1,10 +1,12 @@
 import numpy as np
 from sklearn.base import BaseEstimator
 from sklearn.base import ClassifierMixin
-try: 
+
+try:
     from . import _stumpc as stumpc
 except ImportError:
     import pyximport
+
     pyximport.install()
     import _stumpc as stumpc
 

diff --git a/setup.py b/setup.py
@@ -198,8 +198,11 @@ def setup_package():
 
     try: 
         cythonize_ext_modules = cythonize(ext_modules) 
-    except Exception: 
-        cythonize_ext_modules = cythonize(ext_modules2) 
+    except ValueError: 
+        try: 
+            cythonize_ext_modules = cythonize(ext_modules2) 
+        except ValueError:
+            cythonize_ext_modules = []
 
     metadata = dict(name=DISTNAME,
                     maintainer=MAINTAINER,