From b154106ac8d3d91add52b09bb20d8e3bd562ef0e Mon Sep 17 00:00:00 2001
From: Thierry Moudiki <thierry.moudiki@gmail.com>
Date: Sun, 4 Aug 2024 19:42:15 +0200
Subject: [PATCH] add cross val. and lazy cross val.

---
 examples/classification.py            | 26 +++++++++++++++-----------
 examples/regression.py                | 26 +++++++++++++++-----------
 setup.py                              |  3 +--
 unifiedbooster/__init__.py            |  4 +++-
 unifiedbooster/gbdt.py                |  6 +++---
 unifiedbooster/gbdt_classification.py | 17 +++++++++++------
 unifiedbooster/gbdt_regression.py     | 17 +++++++++++------
 7 files changed, 59 insertions(+), 40 deletions(-)

diff --git a/examples/classification.py b/examples/classification.py
index 2bfc0b8..12334e2 100644
--- a/examples/classification.py
+++ b/examples/classification.py
@@ -1,43 +1,47 @@
+import os 
 import unifiedbooster as ub
 from sklearn.datasets import load_iris
 from sklearn.model_selection import train_test_split, cross_val_score
 from sklearn.metrics import accuracy_score
 
+print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")
+
 # Load dataset
 iris = load_iris()
 X, y = iris.data, iris.target
 
 # Split dataset into training and testing sets
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.2, random_state=42
+)
 
 # Initialize the unified clf (example with XGBoost)
-clf1 = ub.GBDTClassifier(model_type='xgboost')
-#clf2 = ub.GBDTClassifier(model_type='catboost')
-clf3 = ub.GBDTClassifier(model_type='lightgbm')
-clf4 = ub.GBDTClassifier(model_type='gradientboosting', 
-                         colsample=0.9)
+clf1 = ub.GBDTClassifier(model_type="xgboost")
+# clf2 = ub.GBDTClassifier(model_type='catboost')
+clf3 = ub.GBDTClassifier(model_type="lightgbm")
+clf4 = ub.GBDTClassifier(model_type="gradientboosting", colsample=0.9)
 
 # Fit the model
 clf1.fit(X_train, y_train)
-#clf2.fit(X_train, y_train)
+# clf2.fit(X_train, y_train)
 clf3.fit(X_train, y_train)
 clf4.fit(X_train, y_train)
 
 # Predict on the test set
 y_pred1 = clf1.predict(X_test)
-#y_pred2 = clf2.predict(X_test)
+# y_pred2 = clf2.predict(X_test)
 y_pred3 = clf3.predict(X_test)
 y_pred4 = clf4.predict(X_test)
 
 # Evaluate the model
 accuracy1 = accuracy_score(y_test, y_pred1)
-#accuracy2 = accuracy_score(y_test, y_pred2)
+# accuracy2 = accuracy_score(y_test, y_pred2)
 accuracy3 = accuracy_score(y_test, y_pred3)
 accuracy4 = accuracy_score(y_test, y_pred4)
 print(f"Classification Accuracy xgboost: {accuracy1:.2f}")
-#print(f"Classification Accuracy catboost: {accuracy2:.2f}")
+# print(f"Classification Accuracy catboost: {accuracy2:.2f}")
 print(f"Classification Accuracy lightgbm: {accuracy3:.2f}")
 print(f"Classification Accuracy gradientboosting: {accuracy4:.2f}")
 print(f"CV xgboost: {cross_val_score(clf1, X_train, y_train)}")
 print(f"CV lightgbm: {cross_val_score(clf3, X_train, y_train)}")
-print(f"CV gradientboosting: {cross_val_score(clf4, X_train, y_train)}")
\ No newline at end of file
+print(f"CV gradientboosting: {cross_val_score(clf4, X_train, y_train)}")
diff --git a/examples/regression.py b/examples/regression.py
index 8d041a9..011d958 100644
--- a/examples/regression.py
+++ b/examples/regression.py
@@ -1,43 +1,47 @@
+import os 
 import unifiedbooster as ub
 from sklearn.datasets import fetch_california_housing
 from sklearn.model_selection import train_test_split, cross_val_score
 from sklearn.metrics import mean_squared_error
 
+print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")
+
 # Load dataset
 housing = fetch_california_housing()
 X, y = housing.data, housing.target
 
 # Split dataset into training and testing sets
-X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.2, random_state=42
+)
 
 # Initialize the unified regr (example with XGBoost)
-regr1 = ub.GBDTRegressor(model_type='xgboost')
-#regr2 = ub.GBDTRegressor(model_type='catboost')
-regr3 = ub.GBDTRegressor(model_type='lightgbm')
-regr4 = ub.GBDTRegressor(model_type='gradientboosting', 
-                    colsample=0.9)
+regr1 = ub.GBDTRegressor(model_type="xgboost")
+# regr2 = ub.GBDTRegressor(model_type='catboost')
+regr3 = ub.GBDTRegressor(model_type="lightgbm")
+regr4 = ub.GBDTRegressor(model_type="gradientboosting", colsample=0.9)
 
 # Fit the model
 regr1.fit(X_train, y_train)
-#regr2.fit(X_train, y_train)
+# regr2.fit(X_train, y_train)
 regr3.fit(X_train, y_train)
 regr4.fit(X_train, y_train)
 
 # Predict on the test set
 y_pred1 = regr1.predict(X_test)
-#y_pred2 = regr2.predict(X_test)
+# y_pred2 = regr2.predict(X_test)
 y_pred3 = regr3.predict(X_test)
 y_pred4 = regr4.predict(X_test)
 
 # Evaluate the model
 mse1 = mean_squared_error(y_test, y_pred1)
-#mse2 = mean_squared_error(y_test, y_pred2)
+# mse2 = mean_squared_error(y_test, y_pred2)
 mse3 = mean_squared_error(y_test, y_pred3)
 mse4 = mean_squared_error(y_test, y_pred4)
 print(f"Regression Mean Squared Error xgboost: {mse1:.2f}")
-#print(f"Regression Mean Squared Error catboost: {mse2:.2f}")
+# print(f"Regression Mean Squared Error catboost: {mse2:.2f}")
 print(f"Regression Mean Squared Error lightgbm: {mse3:.2f}")
 print(f"Regression Mean Squared Error gradientboosting: {mse4:.2f}")
 print(f"CV xgboost: {cross_val_score(regr1, X_train, y_train)}")
 print(f"CV lightgbm: {cross_val_score(regr3, X_train, y_train)}")
-print(f"CV gradientboosting: {cross_val_score(regr4, X_train, y_train)}")
\ No newline at end of file
+print(f"CV gradientboosting: {cross_val_score(regr4, X_train, y_train)}")
diff --git a/setup.py b/setup.py
index 2e33748..6d7f140 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,6 @@
 
 """The setup script."""
 
-import platform
 import subprocess
 from setuptools import setup, find_packages
 from codecs import open
@@ -10,7 +9,7 @@
 
 subprocess.check_call(['pip', 'install', 'Cython'])
 
-__version__ = "0.3.0"
+__version__ = "0.4.0"
 
 here = path.abspath(path.dirname(__file__))
 
diff --git a/unifiedbooster/__init__.py b/unifiedbooster/__init__.py
index 4e1106b..f322b65 100644
--- a/unifiedbooster/__init__.py
+++ b/unifiedbooster/__init__.py
@@ -1,5 +1,7 @@
 from .gbdt import GBDT
 from .gbdt_classification import GBDTClassifier
 from .gbdt_regression import GBDTRegressor
+from .gpopt import cross_val_optim, lazy_cross_val_optim
 
-__all__ = ["GBDT", "GBDTClassifier", "GBDTRegressor"]
+__all__ = ["GBDT", "GBDTClassifier", "GBDTRegressor", 
+           "cross_val_optim", "lazy_cross_val_optim"]
diff --git a/unifiedbooster/gbdt.py b/unifiedbooster/gbdt.py
index 871761d..07fcbb0 100644
--- a/unifiedbooster/gbdt.py
+++ b/unifiedbooster/gbdt.py
@@ -16,8 +16,8 @@ class GBDT(BaseEstimator):
 
         learning_rate: float
             shrinkage rate; used for reducing the gradient step
-        
-        max_depth: int 
+
+        max_depth: int
             maximum tree depth
 
         rowsample: float
@@ -90,7 +90,7 @@ def __init__(
                 "depth": self.max_depth,
                 "verbose": self.verbose,
                 "random_seed": self.seed,
-                "bootstrap_type": "Bernoulli",
+                "bootstrap_type": "MVS",
                 **kwargs,
             }
         elif self.model_type == "gradientboosting":
diff --git a/unifiedbooster/gbdt_classification.py b/unifiedbooster/gbdt_classification.py
index 918f9eb..0e9eca0 100644
--- a/unifiedbooster/gbdt_classification.py
+++ b/unifiedbooster/gbdt_classification.py
@@ -1,12 +1,17 @@
 from .gbdt import GBDT
 from sklearn.base import ClassifierMixin
-from xgboost import XGBClassifier
-
+try:
+    from xgboost import XGBClassifier
+except: 
+    pass 
 try:
     from catboost import CatBoostClassifier
 except:
-    print("catboost package can't be built")
-from lightgbm import LGBMClassifier
+    pass
+try: 
+    from lightgbm import LGBMClassifier
+except:
+    pass 
 from sklearn.ensemble import GradientBoostingClassifier
 
 
@@ -24,8 +29,8 @@ class GBDTClassifier(GBDT, ClassifierMixin):
 
         learning_rate: float
             shrinkage rate; used for reducing the gradient step
-        
-        max_depth: int 
+
+        max_depth: int
             maximum tree depth
 
         rowsample: float
diff --git a/unifiedbooster/gbdt_regression.py b/unifiedbooster/gbdt_regression.py
index c3c6dc3..7df6452 100644
--- a/unifiedbooster/gbdt_regression.py
+++ b/unifiedbooster/gbdt_regression.py
@@ -1,12 +1,17 @@
 from .gbdt import GBDT
 from sklearn.base import RegressorMixin
-from xgboost import XGBRegressor
-
+try: 
+    from xgboost import XGBRegressor
+except:
+    pass 
 try:
     from catboost import CatBoostRegressor
 except:
-    print("catboost package can't be built")
-from lightgbm import LGBMRegressor
+    pass 
+try: 
+    from lightgbm import LGBMRegressor
+except:
+    pass 
 from sklearn.ensemble import GradientBoostingRegressor
 
 
@@ -24,8 +29,8 @@ class GBDTRegressor(GBDT, RegressorMixin):
 
         learning_rate: float
             shrinkage rate; used for reducing the gradient step
-        
-        max_depth: int 
+
+        max_depth: int
             maximum tree depth
 
         rowsample: float