Skip to content

Commit

Permalink
refactor --> bump v0.2.0
Browse files Browse the repository at this point in the history
  • Loading branch information
thierrymoudiki committed Aug 2, 2024
1 parent c1251f9 commit 532d1ef
Show file tree
Hide file tree
Showing 6 changed files with 261 additions and 123 deletions.
18 changes: 18 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,24 @@ coverage: ## check code coverage quickly with the default Python
coverage html
$(BROWSER) htmlcov/index.html

docs: install ## generate docs
pip install black pdoc
black nnetsauce/* --line-length=80
find nnetsauce/ -name "*.py" -exec autopep8 --max-line-length=80 --in-place {} +
pdoc -t docs nnetsauce/* --output-dir nnetsauce-docs
find . -name '__pycache__' -exec rm -fr {} +

servedocs: install ## compile the docs watching for change
pip install black pdoc
black nnetsauce/* --line-length=80
find nnetsauce/ -name "*.py" -exec autopep8 --max-line-length=80 --in-place {} +
pdoc -t docs nnetsauce/*
find . -name '__pycache__' -exec rm -fr {} +

build-site: docs ## export mkdocs website to a folder
cp -rf unifiedbooster-docs/* ../../Pro_Website/Techtonique.github.io/unifiedbooster
find . -name '__pycache__' -exec rm -fr {} +

release: dist ## package and upload a release
pip install twine --ignore-installed
python3 -m twine upload --repository pypi dist/* --verbose
Expand Down
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
Cython
numpy
scikit-learn
xgboost
lightgbm
catboost
catboost
GPopt
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@

subprocess.check_call(['pip', 'install', 'Cython'])

__version__ = "0.1.3"
__version__ = "0.2.0"

here = path.abspath(path.dirname(__file__))

Expand Down
68 changes: 68 additions & 0 deletions unifiedbooster/gbdt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
import numpy as np
from sklearn.base import BaseEstimator


class GBDT(BaseEstimator):
def __init__(self,
model_type='xgboost',
n_estimators=100,
learning_rate=0.1,
max_depth=3,
rowsample=1.0,
colsample=1.0,
verbose=0,
seed=123,
**kwargs):

self.model_type = model_type
self.n_estimators = n_estimators
self.learning_rate = learning_rate
self.max_depth = max_depth
self.rowsample = rowsample
self.colsample = colsample
self.verbose = verbose
self.seed = seed

if self.model_type == "xgboost":
self.params = {
'n_estimators': self.n_estimators,
'learning_rate': self.learning_rate,
'subsample': self.rowsample,
'colsample_bynode': self.colsample,
'max_depth': self.max_depth,
'verbosity': self.verbose,
'seed': self.seed,
**kwargs
}
elif self.model_type == "lightgbm":
verbose = self.verbose - 1 if self.verbose==0 else self.verbose
self.params = {
'n_estimators': self.n_estimators,
'learning_rate': self.learning_rate,
'subsample': self.rowsample,
'feature_fraction_bynode': self.colsample,
'max_depth': self.max_depth,
'verbose': verbose, # keep this way
'seed': self.seed,
**kwargs
}
elif self.model_type == "catboost":
self.params = {
'iterations': self.n_estimators,
'learning_rate': self.learning_rate,
'subsample': self.rowsample,
'rsm': self.colsample,
'depth': self.max_depth,
'verbose': self.verbose,
'random_seed': self.seed,
**kwargs
}

def fit(self, X, y, **kwargs):
if getattr(self, "type_fit") == "classification":
self.classes_ = np.unique(y) # for compatibility with sklearn
self.n_classes_ = len(self.classes_) # for compatibility with sklearn
return getattr(self, "model").fit(X, y, **kwargs)

def predict(self, X):
return getattr(self, "model").predict(X)
147 changes: 86 additions & 61 deletions unifiedbooster/gbdt_classification.py
Original file line number Diff line number Diff line change
@@ -1,67 +1,98 @@
from sklearn.base import BaseEstimator, ClassifierMixin
from .gbdt import GBDT
from sklearn.base import ClassifierMixin
from xgboost import XGBClassifier
from catboost import CatBoostClassifier
from lightgbm import LGBMClassifier


class GBDTClassifier(BaseEstimator, ClassifierMixin):
def __init__(self, model_type='xgboost',
class GBDTClassifier(GBDT, ClassifierMixin):
"""GBDT Classification model
Attributes:
n_estimators: int
maximum number of trees that can be built
learning_rate: float
shrinkage rate; used for reducing the gradient step
rowsample: float
subsample ratio of the training instances
colsample: float
percentage of features to use at each node split
verbose: int
controls verbosity (default=0)
seed: int
reproducibility seed
Examples:
```python
import unifiedbooster as ub
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# Load dataset
iris = load_iris()
X, y = iris.data, iris.target
# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Initialize the unified regressor (example with XGBoost)
regressor1 = ub.GBDTClassifier(model_type='xgboost')
#regressor2 = ub.GBDTClassifier(model_type='catboost')
regressor3 = ub.GBDTClassifier(model_type='lightgbm')
# Fit the model
regressor1.fit(X_train, y_train)
#regressor2.fit(X_train, y_train)
regressor3.fit(X_train, y_train)
# Predict on the test set
y_pred1 = regressor1.predict(X_test)
#y_pred2 = regressor2.predict(X_test)
y_pred3 = regressor3.predict(X_test)
# Evaluate the model
accuracy1 = accuracy_score(y_test, y_pred1)
#accuracy2 = accuracy_score(y_test, y_pred2)
accuracy3 = accuracy_score(y_test, y_pred3)
print(f"Classification Accuracy xgboost: {accuracy1:.2f}")
#print(f"Classification Accuracy catboost: {accuracy2:.2f}")
print(f"Classification Accuracy lightgbm: {accuracy3:.2f}")
```
"""

def __init__(self,
model_type='xgboost',
n_estimators=100,
learning_rate=0.1,
max_depth=3,
subsample=1.0,
verbosity=0,
rowsample=1.0,
colsample=1.0,
verbose=0,
seed=123,
**kwargs):
self.model_type = model_type
self.n_estimators = n_estimators
self.learning_rate = learning_rate
self.max_depth = max_depth
self.subsample = subsample
self.verbosity = verbosity
# xgboost -----
# n_estimators
# learning_rate
# subsample
# max_depth
# lightgbm -----
# n_estimators
# learning_rate
# bagging_fraction
# max_depth
# catboost -----
# iterations
# learning_rate
# rsm
# depth
if self.model_type == "xgboost":
self.params = {
'n_estimators': self.n_estimators,
'learning_rate': self.learning_rate,
'subsample': self.subsample,
'max_depth': self.max_depth,
'verbosity': self.verbosity,
**kwargs
}
elif self.model_type == "lightgbm":
verbose = self.verbosity - 1 if self.verbosity==0 else self.verbosity
self.params = {
'n_estimators': self.n_estimators,
'learning_rate': self.learning_rate,
'bagging_fraction': self.subsample,
'max_depth': self.max_depth,
'verbose': verbose,
**kwargs
}
elif self.model_type == "catboost":
self.params = {
'iterations': self.n_estimators,
'learning_rate': self.learning_rate,
'rsm': self.subsample,
'depth': self.max_depth,
'verbose': self.verbosity,
**kwargs
}

self.type_fit = "classification"

super().__init__(
model_type=model_type,
n_estimators=n_estimators,
learning_rate=learning_rate,
max_depth=max_depth,
rowsample=rowsample,
colsample=colsample,
verbose=verbose,
seed=seed,
**kwargs
)

if model_type == 'xgboost':
self.model = XGBClassifier(**self.params)
elif model_type == 'catboost':
Expand All @@ -70,12 +101,6 @@ def __init__(self, model_type='xgboost',
self.model = LGBMClassifier(**self.params)
else:
raise ValueError(f"Unknown model_type: {model_type}")

def fit(self, X, y, **kwargs):
return self.model.fit(X, y, **kwargs)

def predict(self, X):
return self.model.predict(X)


def predict_proba(self, X):
return self.model.predict_proba(X)
Loading

0 comments on commit 532d1ef

Please sign in to comment.