diff --git a/CHANGES.md b/CHANGES.md index 1d58cd2..1eab268 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,7 @@ +# version 0.15.0 + +- Can use pandas DataFrame in `LSBoostRegressor`, `LSBoostClassifier` and `AdaOpt` + # version 0.14.0 - add polynomial (interactions) features to `LSBoostRegressor` and `LSBoostClassifier` diff --git a/mlsauce.egg-info/PKG-INFO b/mlsauce.egg-info/PKG-INFO deleted file mode 100644 index 9d3abad..0000000 --- a/mlsauce.egg-info/PKG-INFO +++ /dev/null @@ -1,36 +0,0 @@ -Metadata-Version: 2.1 -Name: mlsauce -Version: 0.13.1 -Summary: Miscellaneous Statistical/Machine Learning tools -Maintainer: T. Moudiki -Maintainer-email: thierry.moudiki@gmail.com -License: BSD3 Clause Clear -Platform: linux -Platform: macosx -Platform: windows -Classifier: Development Status :: 2 - Pre-Alpha -Classifier: Intended Audience :: Developers -Classifier: License :: OSI Approved :: BSD License -Classifier: Natural Language :: English -Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.5 -Classifier: Programming Language :: Python :: 3.6 -Classifier: Programming Language :: Python :: 3.7 -Classifier: Programming Language :: Python :: 3.8 -Requires-Python: >=3.5 -License-File: LICENSE -Requires-Dist: numpy -Requires-Dist: Cython -Requires-Dist: joblib -Requires-Dist: pandas -Requires-Dist: requests -Requires-Dist: scikit-learn -Requires-Dist: scipy -Requires-Dist: tqdm -Requires-Dist: jax -Requires-Dist: jaxlib -Provides-Extra: alldeps -Requires-Dist: numpy>=1.13.0; extra == "alldeps" -Requires-Dist: scipy>=0.19.0; extra == "alldeps" - -Miscellaneous Statistical/Machine Learning tools diff --git a/mlsauce.egg-info/SOURCES.txt b/mlsauce.egg-info/SOURCES.txt deleted file mode 100644 index 30eae25..0000000 --- a/mlsauce.egg-info/SOURCES.txt +++ /dev/null @@ -1,59 +0,0 @@ -LICENSE -README.md -setup.cfg -setup.py -mlsauce/__init__.py -mlsauce/_config.py -mlsauce/setup.py -mlsauce.egg-info/PKG-INFO -mlsauce.egg-info/SOURCES.txt -mlsauce.egg-info/dependency_links.txt -mlsauce.egg-info/not-zip-safe -mlsauce.egg-info/requires.txt -mlsauce.egg-info/top_level.txt -mlsauce/adaopt/__init__.py -mlsauce/adaopt/_adaopt.py -mlsauce/adaopt/_adaoptc.c -mlsauce/adaopt/setup.py -mlsauce/booster/__init__.py -mlsauce/booster/_booster_classifier.py -mlsauce/booster/_booster_regressor.py -mlsauce/booster/_boosterc.c -mlsauce/booster/setup.py -mlsauce/datasets/__init__.py -mlsauce/datasets/dowload.py -mlsauce/encoders/__init__.py -mlsauce/encoders/target_encoders.py -mlsauce/lasso/__init__.py -mlsauce/lasso/_lasso.py -mlsauce/lasso/_lassoc.c -mlsauce/lasso/setup.py -mlsauce/nonconformist/__init__.py -mlsauce/nonconformist/acp.py -mlsauce/nonconformist/base.py -mlsauce/nonconformist/cp.py -mlsauce/nonconformist/evaluation.py -mlsauce/nonconformist/icp.py -mlsauce/nonconformist/nc.py -mlsauce/nonconformist/util.py -mlsauce/predictioninterval/__init__.py -mlsauce/predictioninterval/predictioninterval.py -mlsauce/ridge/__init__.py -mlsauce/ridge/_ridge.py -mlsauce/ridge/_ridgec.c -mlsauce/ridge/setup.py -mlsauce/stump/__init__.py -mlsauce/stump/_stump_classifier.py -mlsauce/stump/_stumpc.c -mlsauce/stump/setup.py -mlsauce/tests/__init__.py -mlsauce/tests/test_adaopt.py -mlsauce/utils/__init__.py -mlsauce/utils/get_beta.py -mlsauce/utils/progress_bar.py -mlsauce/utils/memoryuse/__init__.py -mlsauce/utils/memoryuse/mem_usage.py -mlsauce/utils/misc/__init__.py -mlsauce/utils/misc/misc.py -mlsauce/utils/sampling/__init__.py -mlsauce/utils/sampling/rowsubsampling.py \ No newline at end of file diff --git a/mlsauce.egg-info/dependency_links.txt b/mlsauce.egg-info/dependency_links.txt deleted file mode 100644 index 8b13789..0000000 --- a/mlsauce.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/mlsauce.egg-info/not-zip-safe b/mlsauce.egg-info/not-zip-safe deleted file mode 100644 index 8b13789..0000000 --- a/mlsauce.egg-info/not-zip-safe +++ /dev/null @@ -1 +0,0 @@ - diff --git a/mlsauce.egg-info/requires.txt b/mlsauce.egg-info/requires.txt deleted file mode 100644 index b3f1862..0000000 --- a/mlsauce.egg-info/requires.txt +++ /dev/null @@ -1,14 +0,0 @@ -numpy -Cython -joblib -pandas -requests -scikit-learn -scipy -tqdm -jax -jaxlib - -[alldeps] -numpy>=1.13.0 -scipy>=0.19.0 diff --git a/mlsauce.egg-info/top_level.txt b/mlsauce.egg-info/top_level.txt deleted file mode 100644 index 2ccd09c..0000000 --- a/mlsauce.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -mlsauce diff --git a/mlsauce/adaopt/__init__.py b/mlsauce/adaopt/__init__.py index e1bd761..b6a1538 100644 --- a/mlsauce/adaopt/__init__.py +++ b/mlsauce/adaopt/__init__.py @@ -1,6 +1,3 @@ -try: - from ._adaopt import AdaOpt -except ImportError: - pass +from ._adaopt import AdaOpt __all__ = ["AdaOpt"] diff --git a/mlsauce/adaopt/_adaopt.py b/mlsauce/adaopt/_adaopt.py index f04de96..d115902 100644 --- a/mlsauce/adaopt/_adaopt.py +++ b/mlsauce/adaopt/_adaopt.py @@ -1,5 +1,6 @@ -import numpy as np import pickle +import numpy as np +import pandas as pd from joblib import Parallel, delayed from joblib import wrap_non_picklable_objects from sklearn.base import BaseEstimator @@ -17,6 +18,7 @@ pyximport.install() import _adaoptc as adaoptc + class AdaOpt(BaseEstimator, ClassifierMixin): """AdaOpt classifier. @@ -170,6 +172,9 @@ def fit(self, X, y, **kwargs): """ + if isinstance(X, pd.DataFrame): + X = X.values + if self.n_clusters_input > 0: clustered_X, self.scaler_, self.label_encoder_, self.clusterer_ = ( cluster( @@ -259,6 +264,9 @@ def predict_proba(self, X, **kwargs): """ + if isinstance(X, pd.DataFrame): + X = X.values + n_train, p_train = self.scaled_X_train.shape if self.n_clusters_input > 0: diff --git a/mlsauce/booster/_booster_classifier.py b/mlsauce/booster/_booster_classifier.py index 45ef72f..1e7151e 100644 --- a/mlsauce/booster/_booster_classifier.py +++ b/mlsauce/booster/_booster_classifier.py @@ -2,9 +2,11 @@ from . import _boosterc as boosterc except ImportError: import pyximport + pyximport.install() import _boosterc as boosterc import numpy as np +import pandas as pd import platform import warnings from sklearn.preprocessing import PolynomialFeatures @@ -71,7 +73,7 @@ class LSBoostClassifier(BaseEstimator, ClassifierMixin): cluster_scaling: str scaling method for clustering: currently 'standard', 'robust', 'minmax' - + degree: int degree of features interactions to include in the model @@ -96,7 +98,7 @@ def __init__( n_clusters=0, clustering_method="kmeans", cluster_scaling="standard", - degree=0 + degree=0, ): if n_clusters > 0: assert clustering_method in ( @@ -148,7 +150,7 @@ def __init__( self.cluster_scaling = cluster_scaling self.scaler_, self.label_encoder_, self.clusterer_ = None, None, None self.degree = degree - self.poly_ = None + self.poly_ = None def fit(self, X, y, **kwargs): """Fit Booster (classifier) to training data (X, y) @@ -169,9 +171,14 @@ def fit(self, X, y, **kwargs): self: object. """ + if isinstance(X, pd.DataFrame): + X = X.values + if self.degree > 1: - self.poly_ = PolynomialFeatures(degree=self.degree, interaction_only=True) - X = self.poly_.fit_transform(X.copy())[:,1:] + self.poly_ = PolynomialFeatures( + degree=self.degree, interaction_only=True, include_bias=False + ) + X = self.poly_.fit_transform(X) if self.n_clusters > 0: clustered_X, self.scaler_, self.label_encoder_, self.clusterer_ = ( @@ -184,7 +191,7 @@ def fit(self, X, y, **kwargs): seed=self.seed, ) ) - X = np.column_stack((X.copy(), clustered_X)) + X = np.column_stack((X, clustered_X)) self.obj = boosterc.fit_booster_classifier( np.asarray(X, order="C"), @@ -243,13 +250,17 @@ def predict_proba(self, X, **kwargs): probability estimates for test data: {array-like} """ - if self.degree > 0: - X = self.poly_.transform(X.copy())[:,1:] + + if isinstance(X, pd.DataFrame): + X = X.values + + if self.degree > 0: + X = self.poly_.transform(X) if self.n_clusters > 0: X = np.column_stack( ( - X.copy(), + X, cluster( X, training=False, diff --git a/mlsauce/booster/_booster_regressor.py b/mlsauce/booster/_booster_regressor.py index 7197625..57e74c0 100644 --- a/mlsauce/booster/_booster_regressor.py +++ b/mlsauce/booster/_booster_regressor.py @@ -2,9 +2,11 @@ from . import _boosterc as boosterc except ImportError: import pyximport + pyximport.install() import _boosterc as boosterc import numpy as np +import pandas as pd import platform import warnings from sklearn.base import BaseEstimator @@ -82,7 +84,7 @@ class LSBoostRegressor(BaseEstimator, RegressorMixin): cluster_scaling: str scaling method for clustering: currently 'standard', 'robust', 'minmax' - + degree: int degree of features interactions to include in the model @@ -110,7 +112,7 @@ def __init__( n_clusters=0, clustering_method="kmeans", cluster_scaling="standard", - degree=0 + degree=0, ): if n_clusters > 0: assert clustering_method in ( @@ -163,7 +165,7 @@ def __init__( self.n_clusters = n_clusters self.clustering_method = clustering_method self.cluster_scaling = cluster_scaling - self.scaler_, self.label_encoder_, self.clusterer_ = None, None, None + self.scaler_, self.label_encoder_, self.clusterer_ = None, None, None self.degree = degree self.poly_ = None @@ -186,9 +188,14 @@ def fit(self, X, y, **kwargs): self: object. """ + if isinstance(X, pd.DataFrame): + X = X.values + if self.degree > 1: - self.poly_ = PolynomialFeatures(degree=self.degree, interaction_only=True) - X = self.poly_.fit_transform(X.copy())[:,1:] + self.poly_ = PolynomialFeatures( + degree=self.degree, interaction_only=True, include_bias=False + ) + X = self.poly_.fit_transform(X) if self.n_clusters > 0: clustered_X, self.scaler_, self.label_encoder_, self.clusterer_ = ( @@ -201,7 +208,7 @@ def fit(self, X, y, **kwargs): seed=self.seed, ) ) - X = np.column_stack((X.copy(), clustered_X)) + X = np.column_stack((X, clustered_X)) self.obj = boosterc.fit_booster_regressor( X=np.asarray(X, order="C"), @@ -253,13 +260,17 @@ def predict(self, X, level=95, method=None, **kwargs): probability estimates for test data: {array-like} """ - if self.degree > 0: - X = self.poly_.transform(X.copy())[:,1:] + + if isinstance(X, pd.DataFrame): + X = X.values + + if self.degree > 0: + X = self.poly_.transform(X) if self.n_clusters > 0: X = np.column_stack( ( - X.copy(), + X, cluster( X, training=False, diff --git a/mlsauce/lasso/_lasso.py b/mlsauce/lasso/_lasso.py index f2cc877..dd9c52a 100644 --- a/mlsauce/lasso/_lasso.py +++ b/mlsauce/lasso/_lasso.py @@ -5,10 +5,12 @@ from sklearn.base import BaseEstimator from sklearn.base import RegressorMixin from numpy.linalg import inv -try: + +try: from . import _lassoc as mo except ImportError: import pyximport + pyximport.install() import _lassoc as mo from ..utils import get_beta diff --git a/mlsauce/ridge/_ridge.py b/mlsauce/ridge/_ridge.py index 0dd571d..182340c 100644 --- a/mlsauce/ridge/_ridge.py +++ b/mlsauce/ridge/_ridge.py @@ -4,10 +4,12 @@ from sklearn.base import BaseEstimator from sklearn.base import RegressorMixin from numpy.linalg import inv -try: + +try: from . import _ridgec as mo except ImportError: import pyximport + pyximport.install() import _ridgec as mo from ..utils import get_beta diff --git a/mlsauce/stump/_stump_classifier.py b/mlsauce/stump/_stump_classifier.py index e2d6882..eed622a 100644 --- a/mlsauce/stump/_stump_classifier.py +++ b/mlsauce/stump/_stump_classifier.py @@ -1,10 +1,12 @@ import numpy as np from sklearn.base import BaseEstimator from sklearn.base import ClassifierMixin -try: + +try: from . import _stumpc as stumpc except ImportError: import pyximport + pyximport.install() import _stumpc as stumpc diff --git a/setup.py b/setup.py index 6e83b47..c470544 100644 --- a/setup.py +++ b/setup.py @@ -198,8 +198,11 @@ def setup_package(): try: cythonize_ext_modules = cythonize(ext_modules) - except Exception: - cythonize_ext_modules = cythonize(ext_modules2) + except ValueError: + try: + cythonize_ext_modules = cythonize(ext_modules2) + except ValueError: + cythonize_ext_modules = [] metadata = dict(name=DISTNAME, maintainer=MAINTAINER,