conformal GBDT v0.7.0

thierrymoudiki · Sep 2, 2024 · 8ed3574 · 8ed3574
1 parent be8e8d2
commit 8ed3574
Show file tree

Hide file tree

Showing 5 changed files with 104 additions and 38 deletions.
diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
@@ -30,8 +30,8 @@ jobs:
       - name: Run examples 
         run: pip install .&&find examples -maxdepth 2 -name "*.py" -exec  python3 {} \;
 
-      #- name: Publish to PyPI                
-      #  uses: pypa/gh-action-pypi-publish@release/v1
-      #  with:
-      #    password: ${{ secrets.PYPI_GLOBAL_UB }}
-      #    repository-url: https://upload.pypi.org/legacy/
+      - name: Publish to PyPI                
+        uses: pypa/gh-action-pypi-publish@release/v1
+        with:
+          password: ${{ secrets.PYPI_GLOBAL_UB }}
+          repository-url: https://upload.pypi.org/legacy/
diff --git a/unifiedbooster/gbdt.py b/unifiedbooster/gbdt.py
@@ -35,7 +35,6 @@ class GBDT(BaseEstimator):
         **kwargs: dict
             additional parameters to be passed to the class
     """
-
     def __init__(
         self,
         model_type="xgboost",
@@ -44,6 +43,8 @@ def __init__(
         max_depth=3,
         rowsample=1.0,
         colsample=1.0,
+        level=None,
+        pi_method=None,
         verbose=0,
         seed=123,
         **kwargs
@@ -55,6 +56,8 @@ def __init__(
         self.max_depth = max_depth
         self.rowsample = rowsample
         self.colsample = colsample
+        self.level = level
+        self.pi_method = pi_method
         self.verbose = verbose
         self.seed = seed
 
@@ -126,7 +129,6 @@ def fit(self, X, y, **kwargs):
 
             self: object
         """
-
         if getattr(self, "type_fit") == "classification":
             self.classes_ = np.unique(y)  # for compatibility with sklearn
             self.n_classes_ = len(
@@ -152,5 +154,7 @@ def predict(self, X):
 
             model predictions: {array-like}
         """
-
-        return getattr(self, "model").predict(X)
+        if self.level is not None and self.type_fit == "regression":
+            return getattr(self, "model").predict(X, return_pi=True)
+        else:
+            return getattr(self, "model").predict(X)
diff --git a/unifiedbooster/gbdt_classification.py b/unifiedbooster/gbdt_classification.py
@@ -1,6 +1,6 @@
 from .gbdt import GBDT
 from sklearn.base import ClassifierMixin
-from .nonconformist import ClassifierAdapter, IcpClassifier, TcpClassifier, MarginErrFunc   
+from .predictionset import PredictionSet
 
 try:
     from xgboost import XGBClassifier
@@ -40,6 +40,12 @@ class GBDTClassifier(GBDT, ClassifierMixin):
 
         colsample: float
             percentage of features to use at each node split
+        
+        level: float
+            confidence level for prediction sets
+        
+        pi_method: str
+            method for constructing the prediction intervals: 'icp' (inductive conformal), 'tcp' (transductive conformal)
 
         verbose: int
             controls verbosity (default=0)
@@ -89,7 +95,6 @@ class GBDTClassifier(GBDT, ClassifierMixin):
         print(f"Classification Accuracy lightgbm: {accuracy3:.2f}")
         ```
     """
-
     def __init__(
         self,
         model_type="xgboost",
@@ -98,6 +103,8 @@ def __init__(
         max_depth=3,
         rowsample=1.0,
         colsample=1.0,
+        level=None,
+        pi_method="icp",
         verbose=0,
         seed=123,
         **kwargs,
@@ -112,21 +119,46 @@ def __init__(
             max_depth=max_depth,
             rowsample=rowsample,
             colsample=colsample,
+            level=level,
+            pi_method=pi_method,
             verbose=verbose,
             seed=seed,
             **kwargs,
         )
 
-        if model_type == "xgboost":
-            self.model = XGBClassifier(**self.params)
-        elif model_type == "catboost":
-            self.model = CatBoostClassifier(**self.params)
-        elif model_type == "lightgbm":
-            self.model = LGBMClassifier(**self.params)
-        elif model_type == "gradientboosting":
-            self.model = GradientBoostingClassifier(**self.params)
+        if self.level is not None:
+
+            if model_type == "xgboost":
+                self.model = PredictionSet(XGBClassifier(**self.params), 
+                                            level=self.level, 
+                                            method=self.pi_method)
+            elif model_type == "catboost":
+                self.model = PredictionSet(CatBoostClassifier(**self.params), 
+                                            level=self.level, 
+                                            method=self.pi_method)
+            elif model_type == "lightgbm":
+                self.model = PredictionSet(LGBMClassifier(**self.params), 
+                                            level=self.level, 
+                                            method=self.pi_method)
+            elif model_type == "gradientboosting":
+                self.model = PredictionSet(GradientBoostingClassifier(**self.params), 
+                                            level=self.level, 
+                                            method=self.pi_method)
+            else:
+                raise ValueError(f"Unknown model_type: {model_type}")
+
         else:
-            raise ValueError(f"Unknown model_type: {model_type}")
+
+            if model_type == "xgboost":
+                self.model = XGBClassifier(**self.params)
+            elif model_type == "catboost":
+                self.model = CatBoostClassifier(**self.params)
+            elif model_type == "lightgbm":
+                self.model = LGBMClassifier(**self.params)
+            elif model_type == "gradientboosting":
+                self.model = GradientBoostingClassifier(**self.params)
+            else:
+                raise ValueError(f"Unknown model_type: {model_type}")
 
     def predict_proba(self, X):
         """Predict probabilities for test data X.
@@ -144,5 +176,4 @@ def predict_proba(self, X):
 
             probability estimates for test data: {array-like}
         """
-
         return self.model.predict_proba(X)
diff --git a/unifiedbooster/gbdt_regression.py b/unifiedbooster/gbdt_regression.py
@@ -1,7 +1,6 @@
 from .gbdt import GBDT
 from sklearn.base import RegressorMixin
 from .predictioninterval import PredictionInterval
-from .nonconformist import RegressorAdapter, IcpRegressor, RegressorNc, RegressorNormalizer, QuantileRegErrFunc
 
 try:
     from xgboost import XGBRegressor
@@ -41,6 +40,12 @@ class GBDTRegressor(GBDT, RegressorMixin):
 
         colsample: float
             percentage of features to use at each node split
+        
+        level: float
+            confidence level for prediction sets
+
+        pi_method: str
+            method for constructing the prediction intervals: 'splitconformal', 'localconformal'
 
         verbose: int
             controls verbosity (default=0)
@@ -90,7 +95,6 @@ class GBDTRegressor(GBDT, RegressorMixin):
         print(f"Regression Mean Squared Error lightgbm: {mse3:.2f}")
         ```
     """
-
     def __init__(
         self,
         model_type="xgboost",
@@ -99,12 +103,14 @@ def __init__(
         max_depth=3,
         rowsample=1.0,
         colsample=1.0,
+        level=None,
+        pi_method="splitconformal",
         verbose=0,
         seed=123,
         **kwargs,
     ):
 
-        self.type_fit = "regression"
+        self.type_fit = "regression"        
 
         super().__init__(
             model_type=model_type,
@@ -113,18 +119,43 @@ def __init__(
             max_depth=max_depth,
             rowsample=rowsample,
             colsample=colsample,
+            level=level,
+            pi_method=pi_method,
             verbose=verbose,
             seed=seed,
             **kwargs,
         )
 
-        if model_type == "xgboost":
-            self.model = XGBRegressor(**self.params)
-        elif model_type == "catboost":
-            self.model = CatBoostRegressor(**self.params)
-        elif model_type == "lightgbm":
-            self.model = LGBMRegressor(**self.params)
-        elif model_type == "gradientboosting":
-            self.model = GradientBoostingRegressor(**self.params)
-        else:
-            raise ValueError(f"Unknown model_type: {model_type}")
+        if self.level is not None:
+
+            if model_type == "xgboost":
+                self.model = PredictionInterval(XGBRegressor(**self.params), 
+                                                level=self.level, 
+                                                method=self.pi_method)
+            elif model_type == "catboost":
+                self.model = PredictionInterval(CatBoostRegressor(**self.params), 
+                                                level=self.level, 
+                                                method=self.pi_method)
+            elif model_type == "lightgbm":
+                self.model = PredictionInterval(LGBMRegressor(**self.params), 
+                                                level=self.level, 
+                                                method=self.pi_method)
+            elif model_type == "gradientboosting":
+                self.model = PredictionInterval(GradientBoostingRegressor(**self.params), 
+                                                level=self.level, 
+                                                method=self.pi_method)
+            else:
+                raise ValueError(f"Unknown model_type: {model_type}")
+
+        else:     
+
+            if model_type == "xgboost":
+                self.model = XGBRegressor(**self.params)
+            elif model_type == "catboost":
+                self.model = CatBoostRegressor(**self.params)
+            elif model_type == "lightgbm":
+                self.model = LGBMRegressor(**self.params)
+            elif model_type == "gradientboosting":
+                self.model = GradientBoostingRegressor(**self.params)
+            else:
+                raise ValueError(f"Unknown model_type: {model_type}")
diff --git a/unifiedbooster/nonconformist/base.py b/unifiedbooster/nonconformist/base.py
@@ -9,7 +9,7 @@
 import abc
 import numpy as np
 
-from sklearn.base import BaseEstimator
+from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin
 
 
 class RegressorMixin(object):
@@ -102,15 +102,15 @@ def _underlying_predict(self, x):
         pass
 
 
-class ClassifierAdapter(BaseModelAdapter):
+class ClassifierAdapter(BaseModelAdapter, ClassifierMixin):
     def __init__(self, model, fit_params=None):
         super(ClassifierAdapter, self).__init__(model, fit_params)
 
     def _underlying_predict(self, x):
         return self.model.predict_proba(x)
+
 
-
-class RegressorAdapter(BaseModelAdapter):
+class RegressorAdapter(BaseModelAdapter, RegressorMixin):
     def __init__(self, model, fit_params=None):
         super(RegressorAdapter, self).__init__(model, fit_params)