Skip to content

Commit b2a00ed

Browse files
authored
Merge pull request #126 from perib/new_search_space_def
more fixes with search spaces - wrapper, make sure all supported modu…
2 parents 450a7e5 + ef42226 commit b2a00ed

File tree

6 files changed

+71
-50
lines changed

6 files changed

+71
-50
lines changed

.gitignore

+2-1
Original file line numberDiff line numberDiff line change
@@ -13,4 +13,5 @@ dask-worker-space/
1313
target/
1414
.venv/
1515
build/*
16-
*.egg
16+
*.egg
17+
*.coverage*

tpot2/config/classifiers_sklearnex.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
from ConfigSpace import ConfigurationSpace
22
from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal
3-
3+
from ..search_spaces.nodes.estimator_node import NONE_SPECIAL_STRING, TRUE_SPECIAL_STRING, FALSE_SPECIAL_STRING
44

55
def get_RandomForestClassifier_ConfigurationSpace(random_state):
66
space = {
@@ -66,10 +66,9 @@ def get_NuSVC_ConfigurationSpace(random_state):
6666
space = {
6767
'nu': Float("nu", bounds=(0.05, 1.0)),
6868
'kernel': Categorical("kernel", ['poly', 'rbf', 'linear', 'sigmoid']),
69-
'C': Float("C", bounds=(1e-4, 25), log=True),
69+
#'C': Float("C", bounds=(1e-4, 25), log=True),
7070
'degree': Integer("degree", bounds=(1, 4)),
71-
#TODO work around for None value?
72-
#'class_weight': Categorical("class_weight", [None, 'balanced']),
71+
'class_weight': Categorical("class_weight", [NONE_SPECIAL_STRING, 'balanced']),
7372
'max_iter': 3000,
7473
'tol': 0.005,
7574
'probability': Categorical("probability", [True]), # configspace doesn't allow bools as a default value? but does allow them as a value inside a Categorical

tpot2/config/get_configspace.py

+43-38
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import sys
33
import numpy as np
44
import warnings
5+
import importlib.util
56

67
from ..search_spaces.nodes import EstimatorNode
78
from ..search_spaces.pipelines import ChoicePipeline, WrapperPipeline
@@ -27,7 +28,7 @@
2728

2829
from sklearn.linear_model import SGDClassifier
2930
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier
30-
from sklearn.neural_network import MLPClassifier
31+
from sklearn.neural_network import MLPClassifier, MLPRegressor
3132
from sklearn.tree import DecisionTreeClassifier
3233
from xgboost import XGBClassifier
3334
from sklearn.neighbors import KNeighborsClassifier
@@ -101,51 +102,64 @@
101102
from tpot2.builtin_modules import AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer
102103

103104

105+
from tpot2.builtin_modules.genetic_encoders import DominantEncoder, RecessiveEncoder, HeterosisEncoder, UnderDominanceEncoder, OverDominanceEncoder
106+
104107
#MDR
105108

106109

107110
all_methods = [SGDClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, MLPClassifier, DecisionTreeClassifier, XGBClassifier, KNeighborsClassifier, SVC, LogisticRegression, LGBMClassifier, LinearSVC, GaussianNB, BernoulliNB, MultinomialNB, ExtraTreesRegressor, RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor, DecisionTreeRegressor, KNeighborsRegressor, XGBRegressor, ZeroCount, OneHotEncoder, ColumnOneHotEncoder, Binarizer, FastICA, FeatureAgglomeration, MaxAbsScaler, MinMaxScaler, Normalizer, Nystroem, PCA, PolynomialFeatures, RBFSampler, RobustScaler, StandardScaler, SelectFwe, SelectPercentile, VarianceThreshold, SGDRegressor, Ridge, Lasso, ElasticNet, Lars, LassoLars, LassoLarsCV, RidgeCV, SVR, LinearSVR, AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, DecisionTreeRegressor, KNeighborsRegressor, ElasticNetCV,
108-
AdaBoostClassifier,
111+
AdaBoostClassifier,MLPRegressor,
109112
GaussianProcessRegressor, HistGradientBoostingClassifier, HistGradientBoostingRegressor,
110113
AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer,
111114
PowerTransformer, QuantileTransformer,ARDRegression, QuadraticDiscriminantAnalysis, PassiveAggressiveClassifier, LinearDiscriminantAnalysis,
115+
DominantEncoder, RecessiveEncoder, HeterosisEncoder, UnderDominanceEncoder, OverDominanceEncoder,
112116
]
113117

114118

115119
#if mdr is installed
116-
if 'mdr' in sys.modules:
120+
if importlib.util.find_spec('mdr') is not None:
117121
from mdr import MDR, ContinuousMDR
118122
all_methods.append(MDR)
119123
all_methods.append(ContinuousMDR)
120124

121-
if 'skrebate' in sys.modules:
125+
if importlib.util.find_spec('skrebate') is not None:
122126
from skrebate import ReliefF, SURF, SURFstar, MultiSURF
123127
all_methods.append(ReliefF)
124128
all_methods.append(SURF)
125129
all_methods.append(SURFstar)
126130
all_methods.append(MultiSURF)
127131

128-
if 'sklearnex' in sys.modules:
132+
STRING_TO_CLASS = {
133+
t.__name__: t for t in all_methods
134+
}
135+
136+
if importlib.util.find_spec('sklearnex') is not None:
129137
import sklearnex
138+
import sklearnex.linear_model
139+
import sklearnex.svm
140+
import sklearnex.ensemble
141+
import sklearnex.neighbors
130142

131-
all_methods.append(sklearnex.linear_model.LinearRegression)
132-
all_methods.append(sklearnex.linear_model.Ridge)
133-
all_methods.append(sklearnex.linear_model.Lasso)
134-
all_methods.append(sklearnex.linear_model.ElasticNet)
135-
all_methods.append(sklearnex.svm.SVR)
136-
all_methods.append(sklearnex.svm.NuSVR)
137-
all_methods.append(sklearnex.ensemble.RandomForestRegressor)
138-
all_methods.append(sklearnex.neighbors.KNeighborsRegressor)
139-
all_methods.append(sklearnex.ensemble.RandomForestClassifier)
140-
all_methods.append(sklearnex.neighbors.KNeighborsClassifier)
141-
all_methods.append(sklearnex.svm.SVC)
142-
all_methods.append(sklearnex.svm.NuSVC)
143-
all_methods.append(sklearnex.linear_model.LogisticRegression)
143+
144+
sklearnex_methods = []
145+
146+
sklearnex_methods.append(sklearnex.linear_model.LinearRegression)
147+
sklearnex_methods.append(sklearnex.linear_model.Ridge)
148+
sklearnex_methods.append(sklearnex.linear_model.Lasso)
149+
sklearnex_methods.append(sklearnex.linear_model.ElasticNet)
150+
sklearnex_methods.append(sklearnex.svm.SVR)
151+
sklearnex_methods.append(sklearnex.svm.NuSVR)
152+
sklearnex_methods.append(sklearnex.ensemble.RandomForestRegressor)
153+
sklearnex_methods.append(sklearnex.neighbors.KNeighborsRegressor)
154+
sklearnex_methods.append(sklearnex.ensemble.RandomForestClassifier)
155+
sklearnex_methods.append(sklearnex.neighbors.KNeighborsClassifier)
156+
sklearnex_methods.append(sklearnex.svm.SVC)
157+
sklearnex_methods.append(sklearnex.svm.NuSVC)
158+
sklearnex_methods.append(sklearnex.linear_model.LogisticRegression)
159+
160+
STRING_TO_CLASS.update({f"{t.__name__}_sklearnex": t for t in sklearnex_methods})
144161

145162

146-
STRING_TO_CLASS = {
147-
t.__name__: t for t in all_methods
148-
}
149163

150164

151165

@@ -439,15 +453,6 @@ def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_st
439453
if name in GROUPNAMES:
440454
name_list = GROUPNAMES[name]
441455
return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)
442-
443-
if name is None:
444-
warnings.warn(f"name is None")
445-
return None
446-
447-
if name not in STRING_TO_CLASS:
448-
print("FOOO ", name)
449-
warnings.warn(f"Could not find class for {name}")
450-
return None
451456

452457
return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)
453458

@@ -458,21 +463,21 @@ def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None
458463
# TODO Add AdaBoostRegressor, AdaBoostClassifier as wrappers? wrap a decision tree with different params?
459464
# TODO add other meta-estimators?
460465
if name == "RFE_classification":
461-
rfe_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
466+
rfe_sp = get_configspace(name="RFE", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
462467
ext = get_node("ExtraTreesClassifier", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
463-
return WrapperPipeline(nodegen=ext, method=RFE, configspace=rfe_sp)
468+
return WrapperPipeline(nodegen=ext, method=RFE, space=rfe_sp)
464469
if name == "RFE_regression":
465-
rfe_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
470+
rfe_sp = get_configspace(name="RFE", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
466471
ext = get_node("ExtraTreesRegressor", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
467-
return WrapperPipeline(nodegen=ext, method=RFE, configspace=rfe_sp)
472+
return WrapperPipeline(nodegen=ext, method=RFE, space=rfe_sp)
468473
if name == "SelectFromModel_classification":
469-
sfm_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
474+
sfm_sp = get_configspace(name="SelectFromModel", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
470475
ext = get_node("ExtraTreesClassifier", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
471-
return WrapperPipeline(nodegen=ext, method=SelectFromModel, configspace=sfm_sp)
476+
return WrapperPipeline(nodegen=ext, method=SelectFromModel, space=sfm_sp)
472477
if name == "SelectFromModel_regression":
473-
sfm_sp = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
478+
sfm_sp = get_configspace(name="SelectFromModel", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
474479
ext = get_node("ExtraTreesRegressor", n_classes=n_classes, n_samples=n_samples, random_state=random_state)
475-
return WrapperPipeline(nodegen=ext, method=SelectFromModel, configspace=sfm_sp)
480+
return WrapperPipeline(nodegen=ext, method=SelectFromModel, space=sfm_sp)
476481

477482
#these are nodes that have special search spaces which require custom parsing of the hyperparameters
478483
if name == "RobustScaler":

tpot2/config/tests/test_get_configspace.py

+18-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
import tpot2.config
88

9-
from ..get_configspace import STRING_TO_CLASS
9+
from ..get_configspace import STRING_TO_CLASS, GROUPNAMES
1010

1111
def test_loop_through_all_hyperparameters():
1212

@@ -22,4 +22,20 @@ def test_loop_through_all_hyperparameters():
2222
for i in range(1):
2323
estnode = estnode_gen.generate()
2424
est = estnode.export_pipeline()
25-
25+
26+
def test_loop_through_groupnames():
27+
28+
n_classes=3
29+
n_samples=100
30+
n_features=100
31+
random_state=None
32+
33+
for groupname, group in GROUPNAMES.items():
34+
for class_name in group:
35+
print(class_name)
36+
estnode_gen = tpot2.config.get_search_space(class_name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)
37+
38+
#generate 10 random hyperparameters and make sure they are all valid
39+
for i in range(100):
40+
estnode = estnode_gen.generate()
41+
est = estnode.export_pipeline()

tpot2/search_spaces/nodes/estimator_node.py

+2-2
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def __init__(self, method: type,
4545
else:
4646
rng = np.random.default_rng(rng)
4747
self.space.seed(rng.integers(0, 2**32))
48-
self.hyperparameters = self.space.sample_configuration().get_dictionary()
48+
self.hyperparameters = dict(self.space.sample_configuration())
4949

5050
self.check_hyperparameters_for_None()
5151

@@ -55,7 +55,7 @@ def mutate(self, rng=None):
5555

5656
rng = np.random.default_rng(rng)
5757
self.space.seed(rng.integers(0, 2**32))
58-
self.hyperparameters = self.space.sample_configuration().get_dictionary()
58+
self.hyperparameters = dict(self.space.sample_configuration())
5959

6060
self.check_hyperparameters_for_None()
6161
return True

tpot2/search_spaces/pipelines/wrapper.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,14 @@ def __init__(self,
2121
super().__init__()
2222

2323
self.nodegen = nodegen
24-
self.node = np.random.default_rng(rng).choice(self.nodegen).generate()
24+
self.node = self.nodegen.generate(rng)
2525

2626

2727
self.method = method
2828
self.space = space
2929
rng = np.random.default_rng(rng)
3030
self.space.seed(rng.integers(0, 2**32))
31-
self.hyperparameters = self.space.sample_configuration().get_dictionary()
31+
self.hyperparameters = dict(self.space.sample_configuration())
3232

3333

3434

@@ -43,7 +43,7 @@ def mutate(self, rng=None):
4343
def _mutate_hyperparameters(self, rng=None):
4444
rng = np.random.default_rng(rng)
4545
self.space.seed(rng.integers(0, 2**32))
46-
self.hyperparameters = self.space.sample_configuration().get_dictionary()
46+
self.hyperparameters = dict(self.space.sample_configuration())
4747
return True
4848

4949
def _mutate_node(self, rng=None):

0 commit comments

Comments
 (0)