Skip to content

Commit 44d9882

Browse files
Merge pull request #9 from Techtonique/splitconf
Splitconf
2 parents 29a8b48 + 0c9321d commit 44d9882

File tree

7 files changed

+212
-72
lines changed

7 files changed

+212
-72
lines changed

GPopt/GPOpt.py

+126-60
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ class GPOpt:
8080
8181
acquisition: a string;
8282
acquisition function: "ei" (expected improvement) or "ucb" (upper confidence bound)
83+
84+
method: an str;
85+
"bayesian" (default) for Gaussian posteriors, "mc" for Monte Carlo posteriors,
86+
"splitconformal" for conformalized surrogates
8387
8488
min_value: a float;
8589
minimum value of the objective function (default is None). For example,
@@ -114,6 +118,7 @@ def __init__(
114118
save=None,
115119
n_jobs=None,
116120
acquisition="ei",
121+
method="bayesian",
117122
min_value=None,
118123
per_second=False, # /!\ very experimental
119124
log_scale=False, # /!\ experimental
@@ -145,8 +150,10 @@ def __init__(
145150
self.y_min = None
146151
self.y_mean = None
147152
self.y_std = None
153+
self.y_lower = None
154+
self.y_upper = None
148155
self.best_surrogate = None
149-
self.acquisition = acquisition
156+
self.acquisition = acquisition
150157
self.min_value = min_value
151158
self.acq = np.array([])
152159
self.max_acq = []
@@ -160,7 +167,12 @@ def __init__(
160167
)
161168
else:
162169
self.surrogate_obj = surrogate_obj
163-
self.method = None
170+
assert method in (
171+
"bayesian",
172+
"mc",
173+
"splitconformal"
174+
), "method must be in ('bayesian', 'mc', 'splitconformal')"
175+
self.method = method
164176
self.posterior_ = None
165177

166178
# Sobol seqs for initial design and choices
@@ -299,23 +311,42 @@ def surrogate_fit_predict(
299311
assert (
300312
return_std == True and return_pi == True
301313
) == False, "must have either return_std == True or return_pi == True"
314+
302315
if return_std == True:
316+
303317
self.posterior_ = "gaussian"
304318
return self.surrogate_obj.fit(X_train, y_train).predict(
305319
X_test, return_std=True
306320
)
307-
elif return_pi == True:
308-
self.posterior_ = "mc"
309-
res = self.surrogate_obj.fit(X_train, y_train).predict(
310-
X_test, return_pi=True, method="splitconformal"
311-
)
312-
self.y_sims = res.sims
313-
self.y_mean, self.y_std = (
314-
np.mean(self.y_sims, axis=1),
315-
np.std(self.y_sims, axis=1),
316-
)
317-
return self.y_mean, self.y_std, self.y_sims
321+
322+
elif return_pi == True: # here, self.surrogate_obj must have `replications` not None
323+
324+
if self.surrogate_obj.replications is not None:
325+
326+
self.posterior_ = "mc"
327+
res = self.surrogate_obj.fit(X_train, y_train).predict(
328+
X_test, return_pi=True, method="splitconformal"
329+
)
330+
self.y_sims = res.sims
331+
self.y_mean, self.y_std = (
332+
np.mean(self.y_sims, axis=1),
333+
np.std(self.y_sims, axis=1),
334+
)
335+
return self.y_mean, self.y_std, self.y_sims
336+
337+
else: # self.surrogate_obj is conformalized (uses nnetsauce.PredictionInterval)
338+
339+
assert self.acquisition == "ucb", "'acquisition' must be 'ucb' for conformalized surrogates"
340+
self.posterior_ = None
341+
res = self.surrogate_obj.fit(X_train, y_train).predict(
342+
X_test, return_pi=True)
343+
self.y_mean = res.mean
344+
self.y_lower = res.lower
345+
self.y_upper = res.upper
346+
return self.y_mean, self.y_lower, self.y_upper
347+
318348
else:
349+
319350
raise NotImplementedError
320351

321352
# fit predict timings
@@ -332,6 +363,7 @@ def timings_fit_predict(self, X_train, y_train, X_test):
332363
def next_parameter_by_acq(self, i, acq="ei"):
333364

334365
if acq == "ei":
366+
335367
if self.posterior_ == "gaussian":
336368
gamma_hat = (self.y_min - self.y_mean) / self.y_std
337369
self.acq = -self.y_std * (
@@ -343,7 +375,15 @@ def next_parameter_by_acq(self, i, acq="ei"):
343375
)
344376

345377
if acq == "ucb":
346-
self.acq = -(self.y_mean - 1.96 * self.y_std)
378+
379+
if self.posterior_ == "gaussian":
380+
381+
self.acq = (self.y_mean - 1.96 * self.y_std)
382+
383+
elif self.posterior_ is None: # split conformal(ized) estimator
384+
385+
self.acq = self.y_lower
386+
347387

348388
# find max index -----
349389

@@ -404,8 +444,7 @@ def optimize(
404444
n_more_iter=None,
405445
abs_tol=None, # suggested 1e-4, for n_iter = 200
406446
min_budget=50, # minimum budget for early stopping
407-
func_args=None,
408-
method="bayesian",
447+
func_args=None,
409448
):
410449
"""Launch optimization loop.
411450
@@ -426,22 +465,13 @@ def optimize(
426465
minimum number of iterations before early stopping controlled by `abs_tol`
427466
428467
func_args: a list;
429-
additional parameters for the objective function (if necessary)
430-
431-
method: an str;
432-
"bayesian" (default) for Gaussian posteriors or "mc" for Monte Carlo posteriors
468+
additional parameters for the objective function (if necessary)
433469
434470
see also [Bayesian Optimization with GPopt](https://thierrymoudiki.github.io/blog/2021/04/16/python/misc/gpopt)
435471
and [Hyperparameters tuning with GPopt](https://thierrymoudiki.github.io/blog/2021/06/11/python/misc/hyperparam-tuning-gpopt)
436472
437473
"""
438474

439-
assert method in (
440-
"bayesian",
441-
"mc",
442-
), "method must be in ('bayesian', 'mc')"
443-
self.method = method
444-
445475
# verbose = 0: nothing is printed
446476
# verbose = 1: a progress bar is printed (longer than 0)
447477
# verbose = 2: information about each iteration is printed (longer than 1)
@@ -554,7 +584,7 @@ def optimize(
554584

555585
# current gp mean and std on initial design
556586
# /!\ if GP
557-
if self.method == "bayesian":
587+
if self.method == "bayesian":
558588
self.posterior_ = "gaussian"
559589
try:
560590
y_mean, y_std = self.surrogate_fit_predict(
@@ -573,12 +603,17 @@ def optimize(
573603
return_pi=False,
574604
)
575605
y_mean, y_std = preds_with_std[0], preds_with_std[1]
606+
self.y_mean = y_mean
607+
self.y_std = np.maximum(2.220446049250313e-16, y_std)
608+
576609

577610
elif self.method == "mc":
578611
self.posterior_ = "mc"
579612
assert self.surrogate_obj.__class__.__name__.startswith(
580613
"CustomRegressor"
581-
), "for `method = 'mc'`, the surrogate must be a nnetsauce.CustomRegressor()"
614+
) or self.surrogate_obj.__class__.__name__.startswith(
615+
"PredictionInterval"
616+
), "for `method = 'mc'`, the surrogate must be a nnetsauce.CustomRegressor() or nnetsauce.PredictionInterval()"
582617
assert (
583618
self.surrogate_obj.replications is not None
584619
), "for `method = 'mc'`, the surrogate must be a nnetsauce.CustomRegressor() with a number of 'replications' provided"
@@ -590,9 +625,23 @@ def optimize(
590625
return_pi=True,
591626
)
592627
y_mean, y_std = preds_with_std[0], preds_with_std[1]
593-
594-
self.y_mean = y_mean
595-
self.y_std = np.maximum(2.220446049250313e-16, y_std)
628+
self.y_mean = y_mean
629+
self.y_std = np.maximum(2.220446049250313e-16, y_std)
630+
631+
elif self.method == "splitconformal":
632+
self.posterior_ = None
633+
assert self.surrogate_obj.__class__.__name__.startswith(
634+
"PredictionInterval"
635+
), "for `method = 'splitconformal'`, the surrogate must be a nnetsauce.PredictionInterval()"
636+
preds_with_pi = self.surrogate_fit_predict(
637+
np.asarray(self.parameters),
638+
np.asarray(self.scores),
639+
self.x_choices,
640+
return_std=False,
641+
return_pi=True,
642+
)
643+
y_lower = preds_with_pi[1]
644+
self.lower = y_lower
596645

597646
# saving after initial design computation
598647
if self.save is not None:
@@ -631,8 +680,8 @@ def optimize(
631680

632681
for i in range(n_iter):
633682

634-
# find next set of parameters (vector), maximizing ei
635-
next_param = self.next_parameter_by_acq(i=i, acq="ei")
683+
# find next set of parameters (vector), maximizing acquisition function
684+
next_param = self.next_parameter_by_acq(i=i, acq=self.acquisition)
636685

637686
try:
638687

@@ -744,16 +793,17 @@ def optimize(
744793
)
745794
)
746795

747-
elif self.posterior_ == "mc" and self.method == "mc":
748-
self.y_mean, self.y_std, self.y_sims = (
796+
elif self.posterior_ in (None, "mc") and self.method in ("mc", "splitconformal"):
797+
self.y_mean, self.y_lower, self.y_upper = (
749798
self.surrogate_fit_predict(
750799
np.asarray(self.parameters),
751800
np.asarray(self.scores),
752801
self.x_choices,
753802
return_std=False,
754803
return_pi=True,
755804
)
756-
)
805+
)
806+
757807
else:
758808
return NotImplementedError
759809

@@ -808,9 +858,8 @@ def lazyoptimize(
808858
abs_tol=None, # suggested 1e-4, for n_iter = 200
809859
min_budget=50, # minimum budget for early stopping
810860
func_args=None,
811-
method="bayesian", # "bayesian" or "mc
812861
estimators="all",
813-
type_pi="kde", # for now, 'kde' or 'bootstrap'
862+
type_pi="kde", # for now, 'kde', 'bootstrap', 'splitconformal'
814863
type_exec="independent", # "queue" or "independent" (default)
815864
):
816865
"""Launch optimization loop.
@@ -834,15 +883,12 @@ def lazyoptimize(
834883
func_args: a list;
835884
additional parameters for the objective function (if necessary)
836885
837-
method: an str;
838-
"bayesian" (default) for Gaussian posteriors or "mc" for Monte Carlo posteriors
839-
840886
estimators: an str or a list of strs (estimators names)
841887
if "all", then 30 models are fitted. Otherwise, only those provided in the list
842888
are adjusted; for example ["RandomForestRegressor", "Ridge"]
843889
844890
type_pi: an str;
845-
"kde" (default) or "bootstrap"; type of prediction intervals for the surrogate
891+
"kde" (default) or, "splitconformal"; type of prediction intervals for the surrogate
846892
model
847893
848894
type_exec: an str;
@@ -859,20 +905,40 @@ def lazyoptimize(
859905

860906
else:
861907

862-
self.regressors = [
863-
(
864-
"CustomRegressor(" + est[0] + ")",
865-
ns.CustomRegressor(
866-
est[1](), replications=150, type_pi=type_pi
867-
),
868-
)
869-
for est in all_estimators()
870-
if (
871-
issubclass(est[1], RegressorMixin)
872-
and (est[0] not in REMOVED_REGRESSORS)
873-
and (est[0] in estimators)
874-
)
875-
]
908+
if type_pi == "kde":
909+
910+
self.regressors = [
911+
(
912+
"CustomRegressor(" + est[0] + ")",
913+
ns.CustomRegressor(
914+
est[1](), replications=150, type_pi=type_pi
915+
),
916+
)
917+
for est in all_estimators()
918+
if (
919+
issubclass(est[1], RegressorMixin)
920+
and (est[0] not in REMOVED_REGRESSORS)
921+
and (est[0] in estimators)
922+
)
923+
]
924+
925+
elif type_pi == "splitconformal":
926+
927+
self.regressors = [
928+
(
929+
est[0],
930+
ns.PredictionInterval(
931+
est[1](),
932+
type_pi="splitconformal"
933+
),
934+
)
935+
for est in all_estimators()
936+
if (
937+
issubclass(est[1], RegressorMixin)
938+
and (est[0] not in REMOVED_REGRESSORS)
939+
and (est[0] in estimators)
940+
)
941+
]
876942

877943
self.surrogate_fit_predict = partial(
878944
self.surrogate_fit_predict, return_pi=True
@@ -908,6 +974,7 @@ def lazyoptimize(
908974
seed=self.seed,
909975
n_jobs=self.n_jobs,
910976
acquisition=self.acquisition,
977+
method=self.method,
911978
min_value=self.min_value,
912979
surrogate_obj=copy.deepcopy(self.regressors[0][1]),
913980
)
@@ -917,7 +984,6 @@ def lazyoptimize(
917984
abs_tol=abs_tol, # suggested 1e-4, for n_iter = 200
918985
min_budget=min_budget, # minimum budget for early stopping
919986
func_args=func_args,
920-
method=method,
921987
)
922988

923989
score_next_param = gp_opt_obj_prev.y_min
@@ -944,6 +1010,7 @@ def lazyoptimize(
9441010
seed=self.seed,
9451011
n_jobs=self.n_jobs,
9461012
acquisition=self.acquisition,
1013+
method=self.method,
9471014
min_value=self.min_value,
9481015
surrogate_obj=copy.deepcopy(self.regressors[i][1]),
9491016
x_init=np.asarray(gp_opt_obj_prev.parameters),
@@ -955,7 +1022,6 @@ def lazyoptimize(
9551022
abs_tol=abs_tol, # suggested 1e-4, for n_iter = 200
9561023
min_budget=min_budget, # minimum budget for early stopping
9571024
func_args=func_args,
958-
method=method,
9591025
)
9601026

9611027
score_next_param = gp_opt_obj.y_min
@@ -1030,6 +1096,7 @@ def lazyoptimize(
10301096
seed=self.seed,
10311097
n_jobs=self.n_jobs,
10321098
acquisition=self.acquisition,
1099+
method=self.method,
10331100
min_value=self.min_value,
10341101
surrogate_obj=copy.deepcopy(self.regressors[i][1]),
10351102
)
@@ -1039,7 +1106,6 @@ def lazyoptimize(
10391106
abs_tol=abs_tol, # suggested 1e-4, for n_iter = 200
10401107
min_budget=min_budget, # minimum budget for early stopping
10411108
func_args=func_args,
1042-
method=method,
10431109
)
10441110

10451111
score_next_param = gp_opt_obj.y_min
@@ -1080,6 +1146,7 @@ def foo(i):
10801146
seed=self.seed,
10811147
n_jobs=self.n_jobs,
10821148
acquisition=self.acquisition,
1149+
method=self.method,
10831150
min_value=self.min_value,
10841151
surrogate_obj=copy.deepcopy(self.regressors[i][1]),
10851152
)
@@ -1090,7 +1157,6 @@ def foo(i):
10901157
abs_tol=abs_tol, # suggested 1e-4, for n_iter = 200
10911158
min_budget=min_budget, # minimum budget for early stopping
10921159
func_args=func_args,
1093-
method=method,
10941160
)
10951161

10961162
return gp_opt_obj

0 commit comments

Comments
 (0)