refactor --> bump v0.2.0

thierrymoudiki · Aug 2, 2024 · 532d1ef · 532d1ef
1 parent c1251f9
commit 532d1ef
Show file tree

Hide file tree

Showing 6 changed files with 261 additions and 123 deletions.
diff --git a/Makefile b/Makefile
@@ -53,6 +53,24 @@ coverage: ## check code coverage quickly with the default Python
 	coverage html
 	$(BROWSER) htmlcov/index.html
 
+docs: install ## generate docs		
+	pip install black pdoc 
+	black nnetsauce/* --line-length=80	
+	find nnetsauce/ -name "*.py" -exec autopep8 --max-line-length=80 --in-place {} +
+	pdoc -t docs nnetsauce/* --output-dir nnetsauce-docs
+	find . -name '__pycache__' -exec rm -fr {} +
+
+servedocs: install ## compile the docs watching for change	 	
+	pip install black pdoc 
+	black nnetsauce/* --line-length=80	
+	find nnetsauce/ -name "*.py" -exec autopep8 --max-line-length=80 --in-place {} +
+	pdoc -t docs nnetsauce/* 
+	find . -name '__pycache__' -exec rm -fr {} +
+
+build-site: docs ## export mkdocs website to a folder		
+	cp -rf unifiedbooster-docs/* ../../Pro_Website/Techtonique.github.io/unifiedbooster
+	find . -name '__pycache__' -exec rm -fr {} +
+
 release: dist ## package and upload a release
 	pip install twine --ignore-installed
 	python3 -m twine upload --repository pypi dist/* --verbose

diff --git a/requirements.txt b/requirements.txt
@@ -1,5 +1,7 @@
 Cython
+numpy
 scikit-learn
 xgboost
 lightgbm
-catboost
+catboost
+GPopt
diff --git a/setup.py b/setup.py
@@ -10,7 +10,7 @@
 
 subprocess.check_call(['pip', 'install', 'Cython'])
 
-__version__ = "0.1.3"
+__version__ = "0.2.0"
 
 here = path.abspath(path.dirname(__file__))
 

diff --git a/unifiedbooster/gbdt.py b/unifiedbooster/gbdt.py
@@ -0,0 +1,68 @@
+import numpy as np 
+from sklearn.base import BaseEstimator
+
+
+class GBDT(BaseEstimator):
+    def __init__(self, 
+                 model_type='xgboost', 
+                 n_estimators=100, 
+                 learning_rate=0.1, 
+                 max_depth=3, 
+                 rowsample=1.0,
+                 colsample=1.0,    
+                 verbose=0,    
+                 seed=123,          
+                 **kwargs):  
+
+        self.model_type = model_type
+        self.n_estimators = n_estimators
+        self.learning_rate = learning_rate
+        self.max_depth = max_depth
+        self.rowsample = rowsample
+        self.colsample = colsample        
+        self.verbose = verbose 
+        self.seed = seed         
+
+        if self.model_type == "xgboost": 
+            self.params = {
+                'n_estimators': self.n_estimators,
+                'learning_rate': self.learning_rate,
+                'subsample': self.rowsample, 
+                'colsample_bynode': self.colsample,                
+                'max_depth': self.max_depth,
+                'verbosity': self.verbose, 
+                'seed': self.seed,
+                **kwargs
+            }
+        elif self.model_type == "lightgbm":
+             verbose = self.verbose - 1 if self.verbose==0 else self.verbose
+             self.params = {
+                'n_estimators': self.n_estimators, 
+                'learning_rate': self.learning_rate, 
+                'subsample': self.rowsample, 
+                'feature_fraction_bynode': self.colsample,                
+                'max_depth': self.max_depth,
+                'verbose': verbose, # keep this way
+                'seed': self.seed,
+                **kwargs
+            }
+        elif self.model_type == "catboost":
+             self.params = {
+                'iterations': self.n_estimators, 
+                'learning_rate': self.learning_rate, 
+                'subsample': self.rowsample, 
+                'rsm': self.colsample, 
+                'depth': self.max_depth, 
+                'verbose': self.verbose,
+                'random_seed': self.seed, 
+                **kwargs
+            }           
+
+    def fit(self, X, y, **kwargs):
+        if getattr(self, "type_fit") == "classification":
+            self.classes_ = np.unique(y) # for compatibility with sklearn
+            self.n_classes_ = len(self.classes_)  # for compatibility with sklearn        
+        return getattr(self, "model").fit(X, y, **kwargs)
+
+    def predict(self, X):
+        return getattr(self, "model").predict(X)        
diff --git a/unifiedbooster/gbdt_classification.py b/unifiedbooster/gbdt_classification.py
@@ -1,67 +1,98 @@
-from sklearn.base import BaseEstimator, ClassifierMixin
+from .gbdt import GBDT
+from sklearn.base import ClassifierMixin
 from xgboost import XGBClassifier
 from catboost import CatBoostClassifier
 from lightgbm import LGBMClassifier
 
 
-class GBDTClassifier(BaseEstimator, ClassifierMixin):
-    def __init__(self, model_type='xgboost', 
+class GBDTClassifier(GBDT, ClassifierMixin):
+    """GBDT Classification model
+
+    Attributes:
+
+        n_estimators: int
+            maximum number of trees that can be built 
+
+        learning_rate: float
+            shrinkage rate; used for reducing the gradient step
+
+        rowsample: float
+            subsample ratio of the training instances
+
+        colsample: float
+            percentage of features to use at each node split
+
+        verbose: int
+            controls verbosity (default=0)
+        
+        seed: int 
+            reproducibility seed 
+
+    Examples:
+
+    ```python
+    import unifiedbooster as ub
+    from sklearn.datasets import load_iris
+    from sklearn.model_selection import train_test_split
+    from sklearn.metrics import accuracy_score
+
+    # Load dataset
+    iris = load_iris()
+    X, y = iris.data, iris.target
+
+    # Split dataset into training and testing sets
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+    # Initialize the unified regressor (example with XGBoost)
+    regressor1 = ub.GBDTClassifier(model_type='xgboost')
+    #regressor2 = ub.GBDTClassifier(model_type='catboost')
+    regressor3 = ub.GBDTClassifier(model_type='lightgbm')
+
+    # Fit the model
+    regressor1.fit(X_train, y_train)
+    #regressor2.fit(X_train, y_train)
+    regressor3.fit(X_train, y_train)
+
+    # Predict on the test set
+    y_pred1 = regressor1.predict(X_test)
+    #y_pred2 = regressor2.predict(X_test)
+    y_pred3 = regressor3.predict(X_test)
+
+    # Evaluate the model
+    accuracy1 = accuracy_score(y_test, y_pred1)
+    #accuracy2 = accuracy_score(y_test, y_pred2)
+    accuracy3 = accuracy_score(y_test, y_pred3)
+    print(f"Classification Accuracy xgboost: {accuracy1:.2f}")
+    #print(f"Classification Accuracy catboost: {accuracy2:.2f}")
+    print(f"Classification Accuracy lightgbm: {accuracy3:.2f}")
+    ```
+    """
+
+    def __init__(self, 
+                 model_type='xgboost', 
                  n_estimators=100, 
                  learning_rate=0.1, 
                  max_depth=3, 
-                 subsample=1.0,  
-                 verbosity=0,
+                 rowsample=1.0,  
+                 colsample=1.0,
+                 verbose=0,
+                 seed=123,
                  **kwargs):
-        self.model_type = model_type
-        self.n_estimators = n_estimators
-        self.learning_rate = learning_rate
-        self.max_depth = max_depth
-        self.subsample = subsample
-        self.verbosity = verbosity
-        # xgboost -----
-        # n_estimators        
-        # learning_rate
-        # subsample
-        # max_depth
-        # lightgbm -----
-        # n_estimators
-        # learning_rate
-        # bagging_fraction
-        # max_depth        
-        # catboost -----
-        # iterations
-        # learning_rate        
-        # rsm 
-        # depth
-        if self.model_type == "xgboost": 
-            self.params = {
-                'n_estimators': self.n_estimators,
-                'learning_rate': self.learning_rate,
-                'subsample': self.subsample,
-                'max_depth': self.max_depth,
-                'verbosity': self.verbosity,
-                **kwargs
-            }
-        elif self.model_type == "lightgbm":
-             verbose = self.verbosity - 1 if self.verbosity==0 else self.verbosity
-             self.params = {
-                'n_estimators': self.n_estimators,
-                'learning_rate': self.learning_rate,
-                'bagging_fraction': self.subsample,
-                'max_depth': self.max_depth,
-                'verbose': verbose,
-                **kwargs
-            }
-        elif self.model_type == "catboost":
-             self.params = {
-                'iterations': self.n_estimators,
-                'learning_rate': self.learning_rate,
-                'rsm': self.subsample,
-                'depth': self.max_depth,
-                'verbose': self.verbosity,
-                **kwargs
-            }           
 
+        self.type_fit = "classification"
+
+        super().__init__(
+            model_type=model_type, 
+            n_estimators=n_estimators, 
+            learning_rate=learning_rate, 
+            max_depth=max_depth, 
+            rowsample=rowsample,
+            colsample=colsample,    
+            verbose=verbose,    
+            seed=seed,          
+            **kwargs
+        )
+
         if model_type == 'xgboost':
             self.model = XGBClassifier(**self.params)
         elif model_type == 'catboost':            
@@ -70,12 +101,6 @@ def __init__(self, model_type='xgboost',
             self.model = LGBMClassifier(**self.params)
         else:
             raise ValueError(f"Unknown model_type: {model_type}")
-
-    def fit(self, X, y, **kwargs):
-        return self.model.fit(X, y, **kwargs)
-
-    def predict(self, X):
-        return self.model.predict(X)
-
+
     def predict_proba(self, X):
         return self.model.predict_proba(X)