From 0d7aec20d62cb4d01e54de235fe6a39fcc6853d5 Mon Sep 17 00:00:00 2001 From: Thierry Moudiki Date: Tue, 6 Aug 2024 01:58:56 +0200 Subject: [PATCH] faster (hopefully) Catboost --> v0.5.0 --- setup.py | 2 +- unifiedbooster/gbdt.py | 4 +- unifiedbooster/gbdt_classification.py | 72 +++++++++++++-------------- unifiedbooster/gbdt_regression.py | 72 +++++++++++++-------------- unifiedbooster/gpoptimization.py | 18 ++++--- 5 files changed, 88 insertions(+), 80 deletions(-) diff --git a/setup.py b/setup.py index 52354af..097b4dd 100644 --- a/setup.py +++ b/setup.py @@ -9,7 +9,7 @@ subprocess.check_call(['pip', 'install', 'Cython']) -__version__ = "0.4.2" +__version__ = "0.5.0" here = path.abspath(path.dirname(__file__)) diff --git a/unifiedbooster/gbdt.py b/unifiedbooster/gbdt.py index 07fcbb0..fa472d7 100644 --- a/unifiedbooster/gbdt.py +++ b/unifiedbooster/gbdt.py @@ -90,7 +90,9 @@ def __init__( "depth": self.max_depth, "verbose": self.verbose, "random_seed": self.seed, - "bootstrap_type": "MVS", + "boosting_type": "Plain", + "leaf_estimation_iterations": 1, + "bootstrap_type": "Bernoulli", **kwargs, } elif self.model_type == "gradientboosting": diff --git a/unifiedbooster/gbdt_classification.py b/unifiedbooster/gbdt_classification.py index 7029aaf..d0054bf 100644 --- a/unifiedbooster/gbdt_classification.py +++ b/unifiedbooster/gbdt_classification.py @@ -51,42 +51,42 @@ class GBDTClassifier(GBDT, ClassifierMixin): Examples: - ```python - import unifiedbooster as ub - from sklearn.datasets import load_iris - from sklearn.model_selection import train_test_split - from sklearn.metrics import accuracy_score - - # Load dataset - iris = load_iris() - X, y = iris.data, iris.target - - # Split dataset into training and testing sets - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) - - # Initialize the unified regressor (example with XGBoost) - regressor1 = ub.GBDTClassifier(model_type='xgboost') - #regressor2 = ub.GBDTClassifier(model_type='catboost') - regressor3 = ub.GBDTClassifier(model_type='lightgbm') - - # Fit the model - regressor1.fit(X_train, y_train) - #regressor2.fit(X_train, y_train) - regressor3.fit(X_train, y_train) - - # Predict on the test set - y_pred1 = regressor1.predict(X_test) - #y_pred2 = regressor2.predict(X_test) - y_pred3 = regressor3.predict(X_test) - - # Evaluate the model - accuracy1 = accuracy_score(y_test, y_pred1) - #accuracy2 = accuracy_score(y_test, y_pred2) - accuracy3 = accuracy_score(y_test, y_pred3) - print(f"Classification Accuracy xgboost: {accuracy1:.2f}") - #print(f"Classification Accuracy catboost: {accuracy2:.2f}") - print(f"Classification Accuracy lightgbm: {accuracy3:.2f}") - ``` + ```python + import unifiedbooster as ub + from sklearn.datasets import load_iris + from sklearn.model_selection import train_test_split + from sklearn.metrics import accuracy_score + + # Load dataset + iris = load_iris() + X, y = iris.data, iris.target + + # Split dataset into training and testing sets + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + # Initialize the unified regressor (example with XGBoost) + regressor1 = ub.GBDTClassifier(model_type='xgboost') + #regressor2 = ub.GBDTClassifier(model_type='catboost') + regressor3 = ub.GBDTClassifier(model_type='lightgbm') + + # Fit the model + regressor1.fit(X_train, y_train) + #regressor2.fit(X_train, y_train) + regressor3.fit(X_train, y_train) + + # Predict on the test set + y_pred1 = regressor1.predict(X_test) + #y_pred2 = regressor2.predict(X_test) + y_pred3 = regressor3.predict(X_test) + + # Evaluate the model + accuracy1 = accuracy_score(y_test, y_pred1) + #accuracy2 = accuracy_score(y_test, y_pred2) + accuracy3 = accuracy_score(y_test, y_pred3) + print(f"Classification Accuracy xgboost: {accuracy1:.2f}") + #print(f"Classification Accuracy catboost: {accuracy2:.2f}") + print(f"Classification Accuracy lightgbm: {accuracy3:.2f}") + ``` """ def __init__( diff --git a/unifiedbooster/gbdt_regression.py b/unifiedbooster/gbdt_regression.py index 0044b2e..fd76e97 100644 --- a/unifiedbooster/gbdt_regression.py +++ b/unifiedbooster/gbdt_regression.py @@ -51,42 +51,42 @@ class GBDTRegressor(GBDT, RegressorMixin): Examples: - ```python - import unifiedbooster as ub - from sklearn.datasets import fetch_california_housing - from sklearn.model_selection import train_test_split - from sklearn.metrics import mean_squared_error - - # Load dataset - housing = fetch_california_housing() - X, y = housing.data, housing.target - - # Split dataset into training and testing sets - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) - - # Initialize the unified regressor (example with XGBoost) - regressor1 = ub.GBDTRegressor(model_type='xgboost') - #regressor2 = ub.GBDTRegressor(model_type='catboost') - regressor3 = ub.GBDTRegressor(model_type='lightgbm') - - # Fit the model - regressor1.fit(X_train, y_train) - #regressor2.fit(X_train, y_train) - regressor3.fit(X_train, y_train) - - # Predict on the test set - y_pred1 = regressor1.predict(X_test) - #y_pred2 = regressor2.predict(X_test) - y_pred3 = regressor3.predict(X_test) - - # Evaluate the model - mse1 = mean_squared_error(y_test, y_pred1) - #mse2 = mean_squared_error(y_test, y_pred2) - mse3 = mean_squared_error(y_test, y_pred3) - print(f"Regression Mean Squared Error xgboost: {mse1:.2f}") - #print(f"Regression Mean Squared Error catboost: {mse2:.2f}") - print(f"Regression Mean Squared Error lightgbm: {mse3:.2f}") - ``` + ```python + import unifiedbooster as ub + from sklearn.datasets import fetch_california_housing + from sklearn.model_selection import train_test_split + from sklearn.metrics import mean_squared_error + + # Load dataset + housing = fetch_california_housing() + X, y = housing.data, housing.target + + # Split dataset into training and testing sets + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + # Initialize the unified regressor (example with XGBoost) + regressor1 = ub.GBDTRegressor(model_type='xgboost') + #regressor2 = ub.GBDTRegressor(model_type='catboost') + regressor3 = ub.GBDTRegressor(model_type='lightgbm') + + # Fit the model + regressor1.fit(X_train, y_train) + #regressor2.fit(X_train, y_train) + regressor3.fit(X_train, y_train) + + # Predict on the test set + y_pred1 = regressor1.predict(X_test) + #y_pred2 = regressor2.predict(X_test) + y_pred3 = regressor3.predict(X_test) + + # Evaluate the model + mse1 = mean_squared_error(y_test, y_pred1) + #mse2 = mean_squared_error(y_test, y_pred2) + mse3 = mean_squared_error(y_test, y_pred3) + print(f"Regression Mean Squared Error xgboost: {mse1:.2f}") + #print(f"Regression Mean Squared Error catboost: {mse2:.2f}") + print(f"Regression Mean Squared Error lightgbm: {mse3:.2f}") + ``` """ def __init__( diff --git a/unifiedbooster/gpoptimization.py b/unifiedbooster/gpoptimization.py index 1223ec7..52b567a 100644 --- a/unifiedbooster/gpoptimization.py +++ b/unifiedbooster/gpoptimization.py @@ -325,9 +325,14 @@ def lazy_cross_val_optim( Examples: ```python + import os import unifiedbooster as ub from sklearn.datasets import load_breast_cancer from sklearn.model_selection import train_test_split + from sklearn.metrics import accuracy_score + from time import time + + print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n") dataset = load_breast_cancer() X, y = dataset.data, dataset.target @@ -335,25 +340,26 @@ def lazy_cross_val_optim( X, y, test_size=0.2, random_state=42 ) - res1 = ub.cross_val_optim( + start = time() + res4 = ub.lazy_cross_val_optim( X_train, y_train, - X_test=None, - y_test=None, + X_test=X_test, + y_test=y_test, model_type="lightgbm", type_fit="classification", scoring="accuracy", n_estimators=100, - surrogate_obj=None, cv=5, n_jobs=None, n_init=10, n_iter=190, abs_tol=1e-3, - verbose=2, seed=123, + customize=False ) - print(res1) + print(f"Elapsed: {time()-start}") + print(res4) ``` """