From b154106ac8d3d91add52b09bb20d8e3bd562ef0e Mon Sep 17 00:00:00 2001 From: Thierry Moudiki Date: Sun, 4 Aug 2024 19:42:15 +0200 Subject: [PATCH] add cross val. and lazy cross val. --- examples/classification.py | 26 +++++++++++++++----------- examples/regression.py | 26 +++++++++++++++----------- setup.py | 3 +-- unifiedbooster/__init__.py | 4 +++- unifiedbooster/gbdt.py | 6 +++--- unifiedbooster/gbdt_classification.py | 17 +++++++++++------ unifiedbooster/gbdt_regression.py | 17 +++++++++++------ 7 files changed, 59 insertions(+), 40 deletions(-) diff --git a/examples/classification.py b/examples/classification.py index 2bfc0b8..12334e2 100644 --- a/examples/classification.py +++ b/examples/classification.py @@ -1,43 +1,47 @@ +import os import unifiedbooster as ub from sklearn.datasets import load_iris from sklearn.model_selection import train_test_split, cross_val_score from sklearn.metrics import accuracy_score +print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n") + # Load dataset iris = load_iris() X, y = iris.data, iris.target # Split dataset into training and testing sets -X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) +X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=42 +) # Initialize the unified clf (example with XGBoost) -clf1 = ub.GBDTClassifier(model_type='xgboost') -#clf2 = ub.GBDTClassifier(model_type='catboost') -clf3 = ub.GBDTClassifier(model_type='lightgbm') -clf4 = ub.GBDTClassifier(model_type='gradientboosting', - colsample=0.9) +clf1 = ub.GBDTClassifier(model_type="xgboost") +# clf2 = ub.GBDTClassifier(model_type='catboost') +clf3 = ub.GBDTClassifier(model_type="lightgbm") +clf4 = ub.GBDTClassifier(model_type="gradientboosting", colsample=0.9) # Fit the model clf1.fit(X_train, y_train) -#clf2.fit(X_train, y_train) +# clf2.fit(X_train, y_train) clf3.fit(X_train, y_train) clf4.fit(X_train, y_train) # Predict on the test set y_pred1 = clf1.predict(X_test) -#y_pred2 = clf2.predict(X_test) +# y_pred2 = clf2.predict(X_test) y_pred3 = clf3.predict(X_test) y_pred4 = clf4.predict(X_test) # Evaluate the model accuracy1 = accuracy_score(y_test, y_pred1) -#accuracy2 = accuracy_score(y_test, y_pred2) +# accuracy2 = accuracy_score(y_test, y_pred2) accuracy3 = accuracy_score(y_test, y_pred3) accuracy4 = accuracy_score(y_test, y_pred4) print(f"Classification Accuracy xgboost: {accuracy1:.2f}") -#print(f"Classification Accuracy catboost: {accuracy2:.2f}") +# print(f"Classification Accuracy catboost: {accuracy2:.2f}") print(f"Classification Accuracy lightgbm: {accuracy3:.2f}") print(f"Classification Accuracy gradientboosting: {accuracy4:.2f}") print(f"CV xgboost: {cross_val_score(clf1, X_train, y_train)}") print(f"CV lightgbm: {cross_val_score(clf3, X_train, y_train)}") -print(f"CV gradientboosting: {cross_val_score(clf4, X_train, y_train)}") \ No newline at end of file +print(f"CV gradientboosting: {cross_val_score(clf4, X_train, y_train)}") diff --git a/examples/regression.py b/examples/regression.py index 8d041a9..011d958 100644 --- a/examples/regression.py +++ b/examples/regression.py @@ -1,43 +1,47 @@ +import os import unifiedbooster as ub from sklearn.datasets import fetch_california_housing from sklearn.model_selection import train_test_split, cross_val_score from sklearn.metrics import mean_squared_error +print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n") + # Load dataset housing = fetch_california_housing() X, y = housing.data, housing.target # Split dataset into training and testing sets -X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) +X_train, X_test, y_train, y_test = train_test_split( + X, y, test_size=0.2, random_state=42 +) # Initialize the unified regr (example with XGBoost) -regr1 = ub.GBDTRegressor(model_type='xgboost') -#regr2 = ub.GBDTRegressor(model_type='catboost') -regr3 = ub.GBDTRegressor(model_type='lightgbm') -regr4 = ub.GBDTRegressor(model_type='gradientboosting', - colsample=0.9) +regr1 = ub.GBDTRegressor(model_type="xgboost") +# regr2 = ub.GBDTRegressor(model_type='catboost') +regr3 = ub.GBDTRegressor(model_type="lightgbm") +regr4 = ub.GBDTRegressor(model_type="gradientboosting", colsample=0.9) # Fit the model regr1.fit(X_train, y_train) -#regr2.fit(X_train, y_train) +# regr2.fit(X_train, y_train) regr3.fit(X_train, y_train) regr4.fit(X_train, y_train) # Predict on the test set y_pred1 = regr1.predict(X_test) -#y_pred2 = regr2.predict(X_test) +# y_pred2 = regr2.predict(X_test) y_pred3 = regr3.predict(X_test) y_pred4 = regr4.predict(X_test) # Evaluate the model mse1 = mean_squared_error(y_test, y_pred1) -#mse2 = mean_squared_error(y_test, y_pred2) +# mse2 = mean_squared_error(y_test, y_pred2) mse3 = mean_squared_error(y_test, y_pred3) mse4 = mean_squared_error(y_test, y_pred4) print(f"Regression Mean Squared Error xgboost: {mse1:.2f}") -#print(f"Regression Mean Squared Error catboost: {mse2:.2f}") +# print(f"Regression Mean Squared Error catboost: {mse2:.2f}") print(f"Regression Mean Squared Error lightgbm: {mse3:.2f}") print(f"Regression Mean Squared Error gradientboosting: {mse4:.2f}") print(f"CV xgboost: {cross_val_score(regr1, X_train, y_train)}") print(f"CV lightgbm: {cross_val_score(regr3, X_train, y_train)}") -print(f"CV gradientboosting: {cross_val_score(regr4, X_train, y_train)}") \ No newline at end of file +print(f"CV gradientboosting: {cross_val_score(regr4, X_train, y_train)}") diff --git a/setup.py b/setup.py index 2e33748..6d7f140 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,6 @@ """The setup script.""" -import platform import subprocess from setuptools import setup, find_packages from codecs import open @@ -10,7 +9,7 @@ subprocess.check_call(['pip', 'install', 'Cython']) -__version__ = "0.3.0" +__version__ = "0.4.0" here = path.abspath(path.dirname(__file__)) diff --git a/unifiedbooster/__init__.py b/unifiedbooster/__init__.py index 4e1106b..f322b65 100644 --- a/unifiedbooster/__init__.py +++ b/unifiedbooster/__init__.py @@ -1,5 +1,7 @@ from .gbdt import GBDT from .gbdt_classification import GBDTClassifier from .gbdt_regression import GBDTRegressor +from .gpopt import cross_val_optim, lazy_cross_val_optim -__all__ = ["GBDT", "GBDTClassifier", "GBDTRegressor"] +__all__ = ["GBDT", "GBDTClassifier", "GBDTRegressor", + "cross_val_optim", "lazy_cross_val_optim"] diff --git a/unifiedbooster/gbdt.py b/unifiedbooster/gbdt.py index 871761d..07fcbb0 100644 --- a/unifiedbooster/gbdt.py +++ b/unifiedbooster/gbdt.py @@ -16,8 +16,8 @@ class GBDT(BaseEstimator): learning_rate: float shrinkage rate; used for reducing the gradient step - - max_depth: int + + max_depth: int maximum tree depth rowsample: float @@ -90,7 +90,7 @@ def __init__( "depth": self.max_depth, "verbose": self.verbose, "random_seed": self.seed, - "bootstrap_type": "Bernoulli", + "bootstrap_type": "MVS", **kwargs, } elif self.model_type == "gradientboosting": diff --git a/unifiedbooster/gbdt_classification.py b/unifiedbooster/gbdt_classification.py index 918f9eb..0e9eca0 100644 --- a/unifiedbooster/gbdt_classification.py +++ b/unifiedbooster/gbdt_classification.py @@ -1,12 +1,17 @@ from .gbdt import GBDT from sklearn.base import ClassifierMixin -from xgboost import XGBClassifier - +try: + from xgboost import XGBClassifier +except: + pass try: from catboost import CatBoostClassifier except: - print("catboost package can't be built") -from lightgbm import LGBMClassifier + pass +try: + from lightgbm import LGBMClassifier +except: + pass from sklearn.ensemble import GradientBoostingClassifier @@ -24,8 +29,8 @@ class GBDTClassifier(GBDT, ClassifierMixin): learning_rate: float shrinkage rate; used for reducing the gradient step - - max_depth: int + + max_depth: int maximum tree depth rowsample: float diff --git a/unifiedbooster/gbdt_regression.py b/unifiedbooster/gbdt_regression.py index c3c6dc3..7df6452 100644 --- a/unifiedbooster/gbdt_regression.py +++ b/unifiedbooster/gbdt_regression.py @@ -1,12 +1,17 @@ from .gbdt import GBDT from sklearn.base import RegressorMixin -from xgboost import XGBRegressor - +try: + from xgboost import XGBRegressor +except: + pass try: from catboost import CatBoostRegressor except: - print("catboost package can't be built") -from lightgbm import LGBMRegressor + pass +try: + from lightgbm import LGBMRegressor +except: + pass from sklearn.ensemble import GradientBoostingRegressor @@ -24,8 +29,8 @@ class GBDTRegressor(GBDT, RegressorMixin): learning_rate: float shrinkage rate; used for reducing the gradient step - - max_depth: int + + max_depth: int maximum tree depth rowsample: float