Skip to content

Commit

Permalink
conformal GBDT v0.7.0
Browse files Browse the repository at this point in the history
  • Loading branch information
thierrymoudiki committed Sep 2, 2024
1 parent be8e8d2 commit 8ed3574
Show file tree
Hide file tree
Showing 5 changed files with 104 additions and 38 deletions.
10 changes: 5 additions & 5 deletions .github/workflows/python-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,8 @@ jobs:
- name: Run examples
run: pip install .&&find examples -maxdepth 2 -name "*.py" -exec python3 {} \;

#- name: Publish to PyPI
# uses: pypa/gh-action-pypi-publish@release/v1
# with:
# password: ${{ secrets.PYPI_GLOBAL_UB }}
# repository-url: https://upload.pypi.org/legacy/
- name: Publish to PyPI
uses: pypa/gh-action-pypi-publish@release/v1
with:
password: ${{ secrets.PYPI_GLOBAL_UB }}
repository-url: https://upload.pypi.org/legacy/
12 changes: 8 additions & 4 deletions unifiedbooster/gbdt.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,6 @@ class GBDT(BaseEstimator):
**kwargs: dict
additional parameters to be passed to the class
"""

def __init__(
self,
model_type="xgboost",
Expand All @@ -44,6 +43,8 @@ def __init__(
max_depth=3,
rowsample=1.0,
colsample=1.0,
level=None,
pi_method=None,
verbose=0,
seed=123,
**kwargs
Expand All @@ -55,6 +56,8 @@ def __init__(
self.max_depth = max_depth
self.rowsample = rowsample
self.colsample = colsample
self.level = level
self.pi_method = pi_method
self.verbose = verbose
self.seed = seed

Expand Down Expand Up @@ -126,7 +129,6 @@ def fit(self, X, y, **kwargs):
self: object
"""

if getattr(self, "type_fit") == "classification":
self.classes_ = np.unique(y) # for compatibility with sklearn
self.n_classes_ = len(
Expand All @@ -152,5 +154,7 @@ def predict(self, X):
model predictions: {array-like}
"""

return getattr(self, "model").predict(X)
if self.level is not None and self.type_fit == "regression":
return getattr(self, "model").predict(X, return_pi=True)
else:
return getattr(self, "model").predict(X)
55 changes: 43 additions & 12 deletions unifiedbooster/gbdt_classification.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from .gbdt import GBDT
from sklearn.base import ClassifierMixin
from .nonconformist import ClassifierAdapter, IcpClassifier, TcpClassifier, MarginErrFunc
from .predictionset import PredictionSet

try:
from xgboost import XGBClassifier
Expand Down Expand Up @@ -40,6 +40,12 @@ class GBDTClassifier(GBDT, ClassifierMixin):
colsample: float
percentage of features to use at each node split
level: float
confidence level for prediction sets
pi_method: str
method for constructing the prediction intervals: 'icp' (inductive conformal), 'tcp' (transductive conformal)
verbose: int
controls verbosity (default=0)
Expand Down Expand Up @@ -89,7 +95,6 @@ class GBDTClassifier(GBDT, ClassifierMixin):
print(f"Classification Accuracy lightgbm: {accuracy3:.2f}")
```
"""

def __init__(
self,
model_type="xgboost",
Expand All @@ -98,6 +103,8 @@ def __init__(
max_depth=3,
rowsample=1.0,
colsample=1.0,
level=None,
pi_method="icp",
verbose=0,
seed=123,
**kwargs,
Expand All @@ -112,21 +119,46 @@ def __init__(
max_depth=max_depth,
rowsample=rowsample,
colsample=colsample,
level=level,
pi_method=pi_method,
verbose=verbose,
seed=seed,
**kwargs,
)

if model_type == "xgboost":
self.model = XGBClassifier(**self.params)
elif model_type == "catboost":
self.model = CatBoostClassifier(**self.params)
elif model_type == "lightgbm":
self.model = LGBMClassifier(**self.params)
elif model_type == "gradientboosting":
self.model = GradientBoostingClassifier(**self.params)
if self.level is not None:

if model_type == "xgboost":
self.model = PredictionSet(XGBClassifier(**self.params),
level=self.level,
method=self.pi_method)
elif model_type == "catboost":
self.model = PredictionSet(CatBoostClassifier(**self.params),
level=self.level,
method=self.pi_method)
elif model_type == "lightgbm":
self.model = PredictionSet(LGBMClassifier(**self.params),
level=self.level,
method=self.pi_method)
elif model_type == "gradientboosting":
self.model = PredictionSet(GradientBoostingClassifier(**self.params),
level=self.level,
method=self.pi_method)
else:
raise ValueError(f"Unknown model_type: {model_type}")

else:
raise ValueError(f"Unknown model_type: {model_type}")

if model_type == "xgboost":
self.model = XGBClassifier(**self.params)
elif model_type == "catboost":
self.model = CatBoostClassifier(**self.params)
elif model_type == "lightgbm":
self.model = LGBMClassifier(**self.params)
elif model_type == "gradientboosting":
self.model = GradientBoostingClassifier(**self.params)
else:
raise ValueError(f"Unknown model_type: {model_type}")

def predict_proba(self, X):
"""Predict probabilities for test data X.
Expand All @@ -144,5 +176,4 @@ def predict_proba(self, X):
probability estimates for test data: {array-like}
"""

return self.model.predict_proba(X)
57 changes: 44 additions & 13 deletions unifiedbooster/gbdt_regression.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
from .gbdt import GBDT
from sklearn.base import RegressorMixin
from .predictioninterval import PredictionInterval
from .nonconformist import RegressorAdapter, IcpRegressor, RegressorNc, RegressorNormalizer, QuantileRegErrFunc

try:
from xgboost import XGBRegressor
Expand Down Expand Up @@ -41,6 +40,12 @@ class GBDTRegressor(GBDT, RegressorMixin):
colsample: float
percentage of features to use at each node split
level: float
confidence level for prediction sets
pi_method: str
method for constructing the prediction intervals: 'splitconformal', 'localconformal'
verbose: int
controls verbosity (default=0)
Expand Down Expand Up @@ -90,7 +95,6 @@ class GBDTRegressor(GBDT, RegressorMixin):
print(f"Regression Mean Squared Error lightgbm: {mse3:.2f}")
```
"""

def __init__(
self,
model_type="xgboost",
Expand All @@ -99,12 +103,14 @@ def __init__(
max_depth=3,
rowsample=1.0,
colsample=1.0,
level=None,
pi_method="splitconformal",
verbose=0,
seed=123,
**kwargs,
):

self.type_fit = "regression"
self.type_fit = "regression"

super().__init__(
model_type=model_type,
Expand All @@ -113,18 +119,43 @@ def __init__(
max_depth=max_depth,
rowsample=rowsample,
colsample=colsample,
level=level,
pi_method=pi_method,
verbose=verbose,
seed=seed,
**kwargs,
)

if model_type == "xgboost":
self.model = XGBRegressor(**self.params)
elif model_type == "catboost":
self.model = CatBoostRegressor(**self.params)
elif model_type == "lightgbm":
self.model = LGBMRegressor(**self.params)
elif model_type == "gradientboosting":
self.model = GradientBoostingRegressor(**self.params)
else:
raise ValueError(f"Unknown model_type: {model_type}")
if self.level is not None:

if model_type == "xgboost":
self.model = PredictionInterval(XGBRegressor(**self.params),
level=self.level,
method=self.pi_method)
elif model_type == "catboost":
self.model = PredictionInterval(CatBoostRegressor(**self.params),
level=self.level,
method=self.pi_method)
elif model_type == "lightgbm":
self.model = PredictionInterval(LGBMRegressor(**self.params),
level=self.level,
method=self.pi_method)
elif model_type == "gradientboosting":
self.model = PredictionInterval(GradientBoostingRegressor(**self.params),
level=self.level,
method=self.pi_method)
else:
raise ValueError(f"Unknown model_type: {model_type}")

else:

if model_type == "xgboost":
self.model = XGBRegressor(**self.params)
elif model_type == "catboost":
self.model = CatBoostRegressor(**self.params)
elif model_type == "lightgbm":
self.model = LGBMRegressor(**self.params)
elif model_type == "gradientboosting":
self.model = GradientBoostingRegressor(**self.params)
else:
raise ValueError(f"Unknown model_type: {model_type}")
8 changes: 4 additions & 4 deletions unifiedbooster/nonconformist/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import abc
import numpy as np

from sklearn.base import BaseEstimator
from sklearn.base import BaseEstimator, RegressorMixin, ClassifierMixin


class RegressorMixin(object):
Expand Down Expand Up @@ -102,15 +102,15 @@ def _underlying_predict(self, x):
pass


class ClassifierAdapter(BaseModelAdapter):
class ClassifierAdapter(BaseModelAdapter, ClassifierMixin):
def __init__(self, model, fit_params=None):
super(ClassifierAdapter, self).__init__(model, fit_params)

def _underlying_predict(self, x):
return self.model.predict_proba(x)



class RegressorAdapter(BaseModelAdapter):
class RegressorAdapter(BaseModelAdapter, RegressorMixin):
def __init__(self, model, fit_params=None):
super(RegressorAdapter, self).__init__(model, fit_params)

Expand Down

0 comments on commit 8ed3574

Please sign in to comment.