From 8ed3574dd2c849f92973ae7dfe11ad745ffa0ac0 Mon Sep 17 00:00:00 2001 From: Thierry Moudiki Date: Mon, 2 Sep 2024 10:51:21 +0200 Subject: [PATCH] conformal GBDT v0.7.0 --- .github/workflows/python-publish.yml | 10 ++--- unifiedbooster/gbdt.py | 12 ++++-- unifiedbooster/gbdt_classification.py | 55 ++++++++++++++++++++------ unifiedbooster/gbdt_regression.py | 57 +++++++++++++++++++++------ unifiedbooster/nonconformist/base.py | 8 ++-- 5 files changed, 104 insertions(+), 38 deletions(-) diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 5f212a4..345ca77 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -30,8 +30,8 @@ jobs: - name: Run examples run: pip install .&&find examples -maxdepth 2 -name "*.py" -exec python3 {} \; - #- name: Publish to PyPI - # uses: pypa/gh-action-pypi-publish@release/v1 - # with: - # password: ${{ secrets.PYPI_GLOBAL_UB }} - # repository-url: https://upload.pypi.org/legacy/ + - name: Publish to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + with: + password: ${{ secrets.PYPI_GLOBAL_UB }} + repository-url: https://upload.pypi.org/legacy/ diff --git a/unifiedbooster/gbdt.py b/unifiedbooster/gbdt.py index c4e774e..30cebae 100644 --- a/unifiedbooster/gbdt.py +++ b/unifiedbooster/gbdt.py @@ -35,7 +35,6 @@ class GBDT(BaseEstimator): **kwargs: dict additional parameters to be passed to the class """ - def __init__( self, model_type="xgboost", @@ -44,6 +43,8 @@ def __init__( max_depth=3, rowsample=1.0, colsample=1.0, + level=None, + pi_method=None, verbose=0, seed=123, **kwargs @@ -55,6 +56,8 @@ def __init__( self.max_depth = max_depth self.rowsample = rowsample self.colsample = colsample + self.level = level + self.pi_method = pi_method self.verbose = verbose self.seed = seed @@ -126,7 +129,6 @@ def fit(self, X, y, **kwargs): self: object """ - if getattr(self, "type_fit") == "classification": self.classes_ = np.unique(y) # for compatibility with sklearn self.n_classes_ = len( @@ -152,5 +154,7 @@ def predict(self, X): model predictions: {array-like} """ - - return getattr(self, "model").predict(X) + if self.level is not None and self.type_fit == "regression": + return getattr(self, "model").predict(X, return_pi=True) + else: + return getattr(self, "model").predict(X) diff --git a/unifiedbooster/gbdt_classification.py b/unifiedbooster/gbdt_classification.py index d358c5e..76d6595 100644 --- a/unifiedbooster/gbdt_classification.py +++ b/unifiedbooster/gbdt_classification.py @@ -1,6 +1,6 @@ from .gbdt import GBDT from sklearn.base import ClassifierMixin -from .nonconformist import ClassifierAdapter, IcpClassifier, TcpClassifier, MarginErrFunc +from .predictionset import PredictionSet try: from xgboost import XGBClassifier @@ -40,6 +40,12 @@ class GBDTClassifier(GBDT, ClassifierMixin): colsample: float percentage of features to use at each node split + + level: float + confidence level for prediction sets + + pi_method: str + method for constructing the prediction intervals: 'icp' (inductive conformal), 'tcp' (transductive conformal) verbose: int controls verbosity (default=0) @@ -89,7 +95,6 @@ class GBDTClassifier(GBDT, ClassifierMixin): print(f"Classification Accuracy lightgbm: {accuracy3:.2f}") ``` """ - def __init__( self, model_type="xgboost", @@ -98,6 +103,8 @@ def __init__( max_depth=3, rowsample=1.0, colsample=1.0, + level=None, + pi_method="icp", verbose=0, seed=123, **kwargs, @@ -112,21 +119,46 @@ def __init__( max_depth=max_depth, rowsample=rowsample, colsample=colsample, + level=level, + pi_method=pi_method, verbose=verbose, seed=seed, **kwargs, ) - if model_type == "xgboost": - self.model = XGBClassifier(**self.params) - elif model_type == "catboost": - self.model = CatBoostClassifier(**self.params) - elif model_type == "lightgbm": - self.model = LGBMClassifier(**self.params) - elif model_type == "gradientboosting": - self.model = GradientBoostingClassifier(**self.params) + if self.level is not None: + + if model_type == "xgboost": + self.model = PredictionSet(XGBClassifier(**self.params), + level=self.level, + method=self.pi_method) + elif model_type == "catboost": + self.model = PredictionSet(CatBoostClassifier(**self.params), + level=self.level, + method=self.pi_method) + elif model_type == "lightgbm": + self.model = PredictionSet(LGBMClassifier(**self.params), + level=self.level, + method=self.pi_method) + elif model_type == "gradientboosting": + self.model = PredictionSet(GradientBoostingClassifier(**self.params), + level=self.level, + method=self.pi_method) + else: + raise ValueError(f"Unknown model_type: {model_type}") + else: - raise ValueError(f"Unknown model_type: {model_type}") + + if model_type == "xgboost": + self.model = XGBClassifier(**self.params) + elif model_type == "catboost": + self.model = CatBoostClassifier(**self.params) + elif model_type == "lightgbm": + self.model = LGBMClassifier(**self.params) + elif model_type == "gradientboosting": + self.model = GradientBoostingClassifier(**self.params) + else: + raise ValueError(f"Unknown model_type: {model_type}") def predict_proba(self, X): """Predict probabilities for test data X. @@ -144,5 +176,4 @@ def predict_proba(self, X): probability estimates for test data: {array-like} """ - return self.model.predict_proba(X) diff --git a/unifiedbooster/gbdt_regression.py b/unifiedbooster/gbdt_regression.py index df74d53..bf78ec9 100644 --- a/unifiedbooster/gbdt_regression.py +++ b/unifiedbooster/gbdt_regression.py @@ -1,7 +1,6 @@ from .gbdt import GBDT from sklearn.base import RegressorMixin from .predictioninterval import PredictionInterval -from .nonconformist import RegressorAdapter, IcpRegressor, RegressorNc, RegressorNormalizer, QuantileRegErrFunc try: from xgboost import XGBRegressor @@ -41,6 +40,12 @@ class GBDTRegressor(GBDT, RegressorMixin): colsample: float percentage of features to use at each node split + + level: float + confidence level for prediction sets + + pi_method: str + method for constructing the prediction intervals: 'splitconformal', 'localconformal' verbose: int controls verbosity (default=0) @@ -90,7 +95,6 @@ class GBDTRegressor(GBDT, RegressorMixin): print(f"Regression Mean Squared Error lightgbm: {mse3:.2f}") ``` """ - def __init__( self, model_type="xgboost", @@ -99,12 +103,14 @@ def __init__( max_depth=3, rowsample=1.0, colsample=1.0, + level=None, + pi_method="splitconformal", verbose=0, seed=123, **kwargs, ): - self.type_fit = "regression" + self.type_fit = "regression" super().__init__( model_type=model_type, @@ -113,18 +119,43 @@ def __init__( max_depth=max_depth, rowsample=rowsample, colsample=colsample, + level=level, + pi_method=pi_method, verbose=verbose, seed=seed, **kwargs, ) - if model_type == "xgboost": - self.model = XGBRegressor(**self.params) - elif model_type == "catboost": - self.model = CatBoostRegressor(**self.params) - elif model_type == "lightgbm": - self.model = LGBMRegressor(**self.params) - elif model_type == "gradientboosting": - self.model = GradientBoostingRegressor(**self.params) - else: - raise ValueError(f"Unknown model_type: {model_type}") + if self.level is not None: + + if model_type == "xgboost": + self.model = PredictionInterval(XGBRegressor(**self.params), + level=self.level, + method=self.pi_method) + elif model_type == "catboost": + self.model = PredictionInterval(CatBoostRegressor(**self.params), + level=self.level, + method=self.pi_method) + elif model_type == "lightgbm": + self.model = PredictionInterval(LGBMRegressor(**self.params), + level=self.level, + method=self.pi_method) + elif model_type == "gradientboosting": + self.model = PredictionInterval(GradientBoostingRegressor(**self.params), + level=self.level, + method=self.pi_method) + else: + raise ValueError(f"Unknown model_type: {model_type}") + + else: + + if model_type == "xgboost": + self.model = XGBRegressor(**self.params) + elif model_type == "catboost": + self.model = CatBoostRegressor(**self.params) + elif model_type == "lightgbm": + self.model = LGBMRegressor(**self.params) + elif model_type == "gradientboosting": + self.model = GradientBoostingRegressor(**self.params) + else: + raise ValueError(f"Unknown model_type: {model_type}") diff --git a/unifiedbooster/nonconformist/base.py b/unifiedbooster/nonconformist/base.py index baea8e9..87c8775 100644 --- a/unifiedbooster/nonconformist/base.py +++ b/unifiedbooster/nonconformist/base.py @@ -9,7 +9,7 @@ import abc import numpy as np -from sklearn.base import BaseEstimator +from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin class RegressorMixin(object): @@ -102,15 +102,15 @@ def _underlying_predict(self, x): pass -class ClassifierAdapter(BaseModelAdapter): +class ClassifierAdapter(BaseModelAdapter, ClassifierMixin): def __init__(self, model, fit_params=None): super(ClassifierAdapter, self).__init__(model, fit_params) def _underlying_predict(self, x): return self.model.predict_proba(x) + - -class RegressorAdapter(BaseModelAdapter): +class RegressorAdapter(BaseModelAdapter, RegressorMixin): def __init__(self, model, fit_params=None): super(RegressorAdapter, self).__init__(model, fit_params)