Skip to content

Commit

Permalink
add cross val. and lazy cross val.
Browse files Browse the repository at this point in the history
  • Loading branch information
thierrymoudiki committed Aug 4, 2024
1 parent f05db7b commit b154106
Show file tree
Hide file tree
Showing 7 changed files with 59 additions and 40 deletions.
26 changes: 15 additions & 11 deletions examples/classification.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,47 @@
import os
import unifiedbooster as ub
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import accuracy_score

print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")

# Load dataset
iris = load_iris()
X, y = iris.data, iris.target

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)

# Initialize the unified clf (example with XGBoost)
clf1 = ub.GBDTClassifier(model_type='xgboost')
#clf2 = ub.GBDTClassifier(model_type='catboost')
clf3 = ub.GBDTClassifier(model_type='lightgbm')
clf4 = ub.GBDTClassifier(model_type='gradientboosting',
colsample=0.9)
clf1 = ub.GBDTClassifier(model_type="xgboost")
# clf2 = ub.GBDTClassifier(model_type='catboost')
clf3 = ub.GBDTClassifier(model_type="lightgbm")
clf4 = ub.GBDTClassifier(model_type="gradientboosting", colsample=0.9)

# Fit the model
clf1.fit(X_train, y_train)
#clf2.fit(X_train, y_train)
# clf2.fit(X_train, y_train)
clf3.fit(X_train, y_train)
clf4.fit(X_train, y_train)

# Predict on the test set
y_pred1 = clf1.predict(X_test)
#y_pred2 = clf2.predict(X_test)
# y_pred2 = clf2.predict(X_test)
y_pred3 = clf3.predict(X_test)
y_pred4 = clf4.predict(X_test)

# Evaluate the model
accuracy1 = accuracy_score(y_test, y_pred1)
#accuracy2 = accuracy_score(y_test, y_pred2)
# accuracy2 = accuracy_score(y_test, y_pred2)
accuracy3 = accuracy_score(y_test, y_pred3)
accuracy4 = accuracy_score(y_test, y_pred4)
print(f"Classification Accuracy xgboost: {accuracy1:.2f}")
#print(f"Classification Accuracy catboost: {accuracy2:.2f}")
# print(f"Classification Accuracy catboost: {accuracy2:.2f}")
print(f"Classification Accuracy lightgbm: {accuracy3:.2f}")
print(f"Classification Accuracy gradientboosting: {accuracy4:.2f}")
print(f"CV xgboost: {cross_val_score(clf1, X_train, y_train)}")
print(f"CV lightgbm: {cross_val_score(clf3, X_train, y_train)}")
print(f"CV gradientboosting: {cross_val_score(clf4, X_train, y_train)}")
print(f"CV gradientboosting: {cross_val_score(clf4, X_train, y_train)}")
26 changes: 15 additions & 11 deletions examples/regression.py
Original file line number Diff line number Diff line change
@@ -1,43 +1,47 @@
import os
import unifiedbooster as ub
from sklearn.datasets import fetch_california_housing
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.metrics import mean_squared_error

print(f"\n ----- Running: {os.path.basename(__file__)}... ----- \n")

# Load dataset
housing = fetch_california_housing()
X, y = housing.data, housing.target

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
X_train, X_test, y_train, y_test = train_test_split(
X, y, test_size=0.2, random_state=42
)

# Initialize the unified regr (example with XGBoost)
regr1 = ub.GBDTRegressor(model_type='xgboost')
#regr2 = ub.GBDTRegressor(model_type='catboost')
regr3 = ub.GBDTRegressor(model_type='lightgbm')
regr4 = ub.GBDTRegressor(model_type='gradientboosting',
colsample=0.9)
regr1 = ub.GBDTRegressor(model_type="xgboost")
# regr2 = ub.GBDTRegressor(model_type='catboost')
regr3 = ub.GBDTRegressor(model_type="lightgbm")
regr4 = ub.GBDTRegressor(model_type="gradientboosting", colsample=0.9)

# Fit the model
regr1.fit(X_train, y_train)
#regr2.fit(X_train, y_train)
# regr2.fit(X_train, y_train)
regr3.fit(X_train, y_train)
regr4.fit(X_train, y_train)

# Predict on the test set
y_pred1 = regr1.predict(X_test)
#y_pred2 = regr2.predict(X_test)
# y_pred2 = regr2.predict(X_test)
y_pred3 = regr3.predict(X_test)
y_pred4 = regr4.predict(X_test)

# Evaluate the model
mse1 = mean_squared_error(y_test, y_pred1)
#mse2 = mean_squared_error(y_test, y_pred2)
# mse2 = mean_squared_error(y_test, y_pred2)
mse3 = mean_squared_error(y_test, y_pred3)
mse4 = mean_squared_error(y_test, y_pred4)
print(f"Regression Mean Squared Error xgboost: {mse1:.2f}")
#print(f"Regression Mean Squared Error catboost: {mse2:.2f}")
# print(f"Regression Mean Squared Error catboost: {mse2:.2f}")
print(f"Regression Mean Squared Error lightgbm: {mse3:.2f}")
print(f"Regression Mean Squared Error gradientboosting: {mse4:.2f}")
print(f"CV xgboost: {cross_val_score(regr1, X_train, y_train)}")
print(f"CV lightgbm: {cross_val_score(regr3, X_train, y_train)}")
print(f"CV gradientboosting: {cross_val_score(regr4, X_train, y_train)}")
print(f"CV gradientboosting: {cross_val_score(regr4, X_train, y_train)}")
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,15 +2,14 @@

"""The setup script."""

import platform
import subprocess
from setuptools import setup, find_packages
from codecs import open
from os import path

subprocess.check_call(['pip', 'install', 'Cython'])

__version__ = "0.3.0"
__version__ = "0.4.0"

here = path.abspath(path.dirname(__file__))

Expand Down
4 changes: 3 additions & 1 deletion unifiedbooster/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from .gbdt import GBDT
from .gbdt_classification import GBDTClassifier
from .gbdt_regression import GBDTRegressor
from .gpopt import cross_val_optim, lazy_cross_val_optim

__all__ = ["GBDT", "GBDTClassifier", "GBDTRegressor"]
__all__ = ["GBDT", "GBDTClassifier", "GBDTRegressor",
"cross_val_optim", "lazy_cross_val_optim"]
6 changes: 3 additions & 3 deletions unifiedbooster/gbdt.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,8 @@ class GBDT(BaseEstimator):
learning_rate: float
shrinkage rate; used for reducing the gradient step
max_depth: int
max_depth: int
maximum tree depth
rowsample: float
Expand Down Expand Up @@ -90,7 +90,7 @@ def __init__(
"depth": self.max_depth,
"verbose": self.verbose,
"random_seed": self.seed,
"bootstrap_type": "Bernoulli",
"bootstrap_type": "MVS",
**kwargs,
}
elif self.model_type == "gradientboosting":
Expand Down
17 changes: 11 additions & 6 deletions unifiedbooster/gbdt_classification.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
from .gbdt import GBDT
from sklearn.base import ClassifierMixin
from xgboost import XGBClassifier

try:
from xgboost import XGBClassifier
except:
pass
try:
from catboost import CatBoostClassifier
except:
print("catboost package can't be built")
from lightgbm import LGBMClassifier
pass
try:
from lightgbm import LGBMClassifier
except:
pass
from sklearn.ensemble import GradientBoostingClassifier


Expand All @@ -24,8 +29,8 @@ class GBDTClassifier(GBDT, ClassifierMixin):
learning_rate: float
shrinkage rate; used for reducing the gradient step
max_depth: int
max_depth: int
maximum tree depth
rowsample: float
Expand Down
17 changes: 11 additions & 6 deletions unifiedbooster/gbdt_regression.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
from .gbdt import GBDT
from sklearn.base import RegressorMixin
from xgboost import XGBRegressor

try:
from xgboost import XGBRegressor
except:
pass
try:
from catboost import CatBoostRegressor
except:
print("catboost package can't be built")
from lightgbm import LGBMRegressor
pass
try:
from lightgbm import LGBMRegressor
except:
pass
from sklearn.ensemble import GradientBoostingRegressor


Expand All @@ -24,8 +29,8 @@ class GBDTRegressor(GBDT, RegressorMixin):
learning_rate: float
shrinkage rate; used for reducing the gradient step
max_depth: int
max_depth: int
maximum tree depth
rowsample: float
Expand Down

0 comments on commit b154106

Please sign in to comment.