Skip to content

Commit

Permalink
conformal GBDT v0.7.0 Pt.3
Browse files Browse the repository at this point in the history
  • Loading branch information
thierrymoudiki committed Sep 2, 2024
1 parent 4d1c79d commit 9028db8
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 52 deletions.
1 change: 1 addition & 0 deletions unifiedbooster/gbdt.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ class GBDT(BaseEstimator):
**kwargs: dict
additional parameters to be passed to the class
"""

def __init__(
self,
model_type="xgboost",
Expand Down
39 changes: 24 additions & 15 deletions unifiedbooster/gbdt_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,10 @@ class GBDTClassifier(GBDT, ClassifierMixin):
colsample: float
percentage of features to use at each node split
level: float
confidence level for prediction sets
pi_method: str
method for constructing the prediction intervals: 'icp' (inductive conformal), 'tcp' (transductive conformal)
Expand Down Expand Up @@ -95,6 +95,7 @@ class GBDTClassifier(GBDT, ClassifierMixin):
print(f"Classification Accuracy lightgbm: {accuracy3:.2f}")
```
"""

def __init__(
self,
model_type="xgboost",
Expand Down Expand Up @@ -129,24 +130,32 @@ def __init__(
if self.level is not None:

if model_type == "xgboost":
self.model = PredictionSet(XGBClassifier(**self.params),
level=self.level,
method=self.pi_method)
self.model = PredictionSet(
XGBClassifier(**self.params),
level=self.level,
method=self.pi_method,
)
elif model_type == "catboost":
self.model = PredictionSet(CatBoostClassifier(**self.params),
level=self.level,
method=self.pi_method)
self.model = PredictionSet(
CatBoostClassifier(**self.params),
level=self.level,
method=self.pi_method,
)
elif model_type == "lightgbm":
self.model = PredictionSet(LGBMClassifier(**self.params),
level=self.level,
method=self.pi_method)
self.model = PredictionSet(
LGBMClassifier(**self.params),
level=self.level,
method=self.pi_method,
)
elif model_type == "gradientboosting":
self.model = PredictionSet(GradientBoostingClassifier(**self.params),
level=self.level,
method=self.pi_method)
self.model = PredictionSet(
GradientBoostingClassifier(**self.params),
level=self.level,
method=self.pi_method,
)
else:
raise ValueError(f"Unknown model_type: {model_type}")

else:

if model_type == "xgboost":
Expand Down
43 changes: 26 additions & 17 deletions unifiedbooster/gbdt_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ class GBDTRegressor(GBDT, RegressorMixin):
colsample: float
percentage of features to use at each node split
level: float
confidence level for prediction sets
Expand Down Expand Up @@ -95,6 +95,7 @@ class GBDTRegressor(GBDT, RegressorMixin):
print(f"Regression Mean Squared Error lightgbm: {mse3:.2f}")
```
"""

def __init__(
self,
model_type="xgboost",
Expand All @@ -110,7 +111,7 @@ def __init__(
**kwargs,
):

self.type_fit = "regression"
self.type_fit = "regression"

super().__init__(
model_type=model_type,
Expand All @@ -129,26 +130,34 @@ def __init__(
if self.level is not None:

if model_type == "xgboost":
self.model = PredictionInterval(XGBRegressor(**self.params),
level=self.level,
method=self.pi_method)
self.model = PredictionInterval(
XGBRegressor(**self.params),
level=self.level,
method=self.pi_method,
)
elif model_type == "catboost":
self.model = PredictionInterval(CatBoostRegressor(**self.params),
level=self.level,
method=self.pi_method)
self.model = PredictionInterval(
CatBoostRegressor(**self.params),
level=self.level,
method=self.pi_method,
)
elif model_type == "lightgbm":
self.model = PredictionInterval(LGBMRegressor(**self.params),
level=self.level,
method=self.pi_method)
self.model = PredictionInterval(
LGBMRegressor(**self.params),
level=self.level,
method=self.pi_method,
)
elif model_type == "gradientboosting":
self.model = PredictionInterval(GradientBoostingRegressor(**self.params),
level=self.level,
method=self.pi_method)
self.model = PredictionInterval(
GradientBoostingRegressor(**self.params),
level=self.level,
method=self.pi_method,
)
else:
raise ValueError(f"Unknown model_type: {model_type}")
else:

else:

if model_type == "xgboost":
self.model = XGBRegressor(**self.params)
elif model_type == "catboost":
Expand Down
4 changes: 2 additions & 2 deletions unifiedbooster/nonconformist/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,9 +28,9 @@
"RegressorAdapter",
"ClassifierAdapter",
"RegressorNc",
"ClassifierNc",
"ClassifierNc",
"RegressorNormalizer",
"IcpRegressor",
"IcpClassifier",
"TcpClassifier"
"TcpClassifier",
]
2 changes: 1 addition & 1 deletion unifiedbooster/nonconformist/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,7 +108,7 @@ def __init__(self, model, fit_params=None):

def _underlying_predict(self, x):
return self.model.predict_proba(x)


class RegressorAdapter(BaseModelAdapter, RegressorMixin):
def __init__(self, model, fit_params=None):
Expand Down
36 changes: 19 additions & 17 deletions unifiedbooster/predictionset/predictionset.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,13 @@
from sklearn.preprocessing import StandardScaler
from scipy.stats import gaussian_kde
from tqdm import tqdm
from ..nonconformist import ClassifierAdapter, IcpClassifier, TcpClassifier, ClassifierNc, MarginErrFunc
from ..nonconformist import (
ClassifierAdapter,
IcpClassifier,
TcpClassifier,
ClassifierNc,
MarginErrFunc,
)


class PredictionSet(BaseEstimator, ClassifierMixin):
Expand Down Expand Up @@ -47,21 +53,18 @@ def __init__(
self.alpha_ = 1 - self.level / 100
self.quantile_ = None
self.icp_ = None
self.tcp_ = None
self.tcp_ = None

if self.method == "icp":
self.icp_ = IcpClassifier(
self.icp_ = IcpClassifier(
ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
)
elif self.method == "tcp":
self.tcp_ = TcpClassifier(
self.tcp_ = TcpClassifier(
ClassifierNc(ClassifierAdapter(self.obj), MarginErrFunc()),
)
else:
raise ValueError(
"`self.method` must be in ('icp', 'tcp')"
)

else:
raise ValueError("`self.method` must be in ('icp', 'tcp')")

def fit(self, X, y):
"""Fit the `method` to training data (X, y).
Expand All @@ -74,13 +77,14 @@ def fit(self, X, y):
y: array-like, shape = [n_samples, ]; Target values.
"""
"""
if self.method == "icp":

X_train, X_calibration, y_train, y_calibration = train_test_split(
X, y, test_size=0.5, random_state=self.seed)
X, y, test_size=0.5, random_state=self.seed
)
self.icp_.fit(X_train, y_train)
self.icp_.calibrate(X_calibration, y_calibration)
self.icp_.calibrate(X_calibration, y_calibration)

elif self.method == "tcp":

Expand All @@ -101,11 +105,9 @@ def predict(self, X):

if self.method == "icp":
return self.icp_.predict(X, significance=self.alpha_)

elif self.method == "tcp":
return self.tcp_.predict(X, significance=self.alpha_)

else:
raise ValueError(
"`self.method` must be in ('icp', 'tcp')"
)
raise ValueError("`self.method` must be in ('icp', 'tcp')")

0 comments on commit 9028db8

Please sign in to comment.