Skip to content

Commit bbef3e4

Browse files
authored
Merge pull request #137 from perib/new_search_space_def
New search space def
2 parents fedc90d + 5425ee4 commit bbef3e4

File tree

6 files changed

+47
-11
lines changed

6 files changed

+47
-11
lines changed

tpot2/builtin_modules/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
from .column_one_hot_encoder import ColumnOneHotEncoder
44
from .arithmetictransformer import ArithmeticTransformer
55
from .arithmetictransformer import AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer
6-
from .passthrough import Passthrough
6+
from .passthrough import Passthrough, SkipTransformer
77
from .imputer import ColumnSimpleImputer
88
from .estimatortransformer import EstimatorTransformer
99
from .passkbinsdiscretizer import PassKBinsDiscretizer

tpot2/builtin_modules/passthrough.py

+12
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from sklearn.base import BaseEstimator, TransformerMixin
2+
import numpy as np
23

34
class Passthrough(TransformerMixin,BaseEstimator):
45

@@ -7,3 +8,14 @@ def fit(self, X=None, y=None):
78

89
def transform(self, X):
910
return X
11+
12+
13+
class SkipTransformer(TransformerMixin,BaseEstimator):
14+
15+
def fit(self, X=None, y=None):
16+
return self
17+
18+
def transform(self, X):
19+
#empty array of same shape as X
20+
return np.array([]).reshape(X.shape[0],0)
21+

tpot2/config/get_configspace.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@
2727
from tpot2.builtin_modules import AddTransformer, mul_neg_1_Transformer, MulTransformer, SafeReciprocalTransformer, EQTransformer, NETransformer, GETransformer, GTTransformer, LETransformer, LTTransformer, MinTransformer, MaxTransformer, ZeroTransformer, OneTransformer, NTransformer
2828
from tpot2.builtin_modules.genetic_encoders import DominantEncoder, RecessiveEncoder, HeterosisEncoder, UnderDominanceEncoder, OverDominanceEncoder
2929
from tpot2.builtin_modules import ZeroCount, ColumnOneHotEncoder, PassKBinsDiscretizer
30-
from tpot2.builtin_modules import Passthrough
30+
from tpot2.builtin_modules import Passthrough, SkipTransformer
3131
from sklearn.linear_model import SGDClassifier, LogisticRegression, SGDRegressor, Ridge, Lasso, ElasticNet, Lars, LassoLars, LassoLarsCV, RidgeCV, ElasticNetCV, PassiveAggressiveClassifier, ARDRegression
3232
from sklearn.ensemble import BaggingClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, ExtraTreesRegressor, ExtraTreesClassifier, AdaBoostRegressor, AdaBoostClassifier, GradientBoostingRegressor,RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, HistGradientBoostingClassifier, HistGradientBoostingRegressor
3333
from sklearn.neural_network import MLPClassifier, MLPRegressor
@@ -45,7 +45,7 @@
4545
from sklearn.feature_selection import f_classif, f_regression #TODO create a selectomixin using these?
4646
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis, QuadraticDiscriminantAnalysis
4747
from sklearn.gaussian_process import GaussianProcessRegressor, GaussianProcessClassifier
48-
48+
from sklearn.impute import SimpleImputer
4949

5050
all_methods = [SGDClassifier, RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier, MLPClassifier, DecisionTreeClassifier, XGBClassifier, KNeighborsClassifier, SVC, LogisticRegression, LGBMClassifier, LinearSVC, GaussianNB, BernoulliNB, MultinomialNB, ExtraTreesRegressor, RandomForestRegressor, GradientBoostingRegressor, BaggingRegressor, DecisionTreeRegressor, KNeighborsRegressor, XGBRegressor, ZeroCount, ColumnOneHotEncoder, Binarizer, FastICA, FeatureAgglomeration, MaxAbsScaler, MinMaxScaler, Normalizer, Nystroem, PCA, PolynomialFeatures, RBFSampler, RobustScaler, StandardScaler, SelectFwe, SelectPercentile, VarianceThreshold, SGDRegressor, Ridge, Lasso, ElasticNet, Lars, LassoLars, LassoLarsCV, RidgeCV, SVR, LinearSVR, AdaBoostRegressor, GradientBoostingRegressor, RandomForestRegressor, BaggingRegressor, ExtraTreesRegressor, DecisionTreeRegressor, KNeighborsRegressor, ElasticNetCV,
5151
AdaBoostClassifier,MLPRegressor,
@@ -54,8 +54,9 @@
5454
PowerTransformer, QuantileTransformer,ARDRegression, QuadraticDiscriminantAnalysis, PassiveAggressiveClassifier, LinearDiscriminantAnalysis,
5555
DominantEncoder, RecessiveEncoder, HeterosisEncoder, UnderDominanceEncoder, OverDominanceEncoder,
5656
GaussianProcessClassifier, BaggingClassifier,LGBMRegressor,
57-
Passthrough,
57+
Passthrough,SkipTransformer,
5858
PassKBinsDiscretizer,
59+
SimpleImputer,
5960
]
6061

6162

@@ -123,7 +124,7 @@
123124
"all_transformers" : ["transformers", "scalers"],
124125

125126
"arithmatic": ["AddTransformer", "mul_neg_1_Transformer", "MulTransformer", "SafeReciprocalTransformer", "EQTransformer", "NETransformer", "GETransformer", "GTTransformer", "LETransformer", "LTTransformer", "MinTransformer", "MaxTransformer"],
126-
"imputers": [],
127+
"imputers": ["SimpleImputer"],
127128
"skrebate": ["ReliefF", "SURF", "SURFstar", "MultiSURF"],
128129
"genetic_encoders": ["DominantEncoder", "RecessiveEncoder", "HeterosisEncoder", "UnderDominanceEncoder", "OverDominanceEncoder"],
129130

@@ -135,6 +136,8 @@
135136

136137
def get_configspace(name, n_classes=3, n_samples=1000, n_features=100, random_state=None):
137138
match name:
139+
case "SimpleImputer":
140+
return imputers.simple_imputer_cs
138141

139142
#autoqtl_builtins.py
140143
case "FeatureEncodingFrequencySelector":
@@ -152,6 +155,8 @@ def get_configspace(name, n_classes=3, n_samples=1000, n_features=100, random_st
152155

153156
case "Passthrough":
154157
return {}
158+
case "SkipTransformer":
159+
return {}
155160

156161
#classifiers.py
157162
case "LinearDiscriminantAnalysis":

tpot2/config/imputers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
from ConfigSpace import ConfigurationSpace
22
from ConfigSpace import ConfigurationSpace, Integer, Float, Categorical, Normal
33

4-
simple_imputer = ConfigurationSpace(
4+
simple_imputer_cs = ConfigurationSpace(
55
space = {
66
'strategy' : Categorical('strategy', ['mean','median', 'most_frequent', ]),
77
'add_indicator' : Categorical('add_indicator', [True, False]),

tpot2/search_spaces/pipelines/sequential.py

+23-4
Original file line numberDiff line numberDiff line change
@@ -12,9 +12,10 @@ class SequentialPipelineIndividual(SklearnIndividual):
1212
# takes in a list of search spaces. each space is a list of SklearnIndividualGenerators.
1313
# will produce a pipeline of Sequential length. Each step in the pipeline will correspond to the the search space provided in the same index.
1414

15-
def __init__(self, search_spaces : List[SklearnIndividualGenerator], rng=None) -> None:
15+
def __init__(self, search_spaces : List[SklearnIndividualGenerator], memory=None, rng=None) -> None:
1616
super().__init__()
1717
self.search_spaces = search_spaces
18+
self.memory = memory
1819
self.pipeline = []
1920

2021
for space in self.search_spaces:
@@ -25,6 +26,14 @@ def __init__(self, search_spaces : List[SklearnIndividualGenerator], rng=None) -
2526
#TODO, mutate all steps or just one?
2627
def mutate(self, rng=None):
2728
rng = np.random.default_rng()
29+
30+
# mutated = False
31+
# for step in self.pipeline:
32+
# if rng.random() < 0.5:
33+
# if step.mutate(rng):
34+
# mutated = True
35+
# return mutated
36+
2837
step = rng.choice(self.pipeline)
2938
return step.mutate(rng)
3039

@@ -102,6 +111,15 @@ def _crossover_swap_segment(self, other, rng):
102111
def _crossover_inner_step(self, other, rng):
103112
rng = np.random.default_rng()
104113

114+
# crossover_success = False
115+
# for idx in range(len(self.pipeline)):
116+
# if rng.random() < 0.5:
117+
# if self.pipeline[idx].crossover(other.pipeline[idx], rng):
118+
# crossover_success = True
119+
120+
# return crossover_success
121+
122+
105123
crossover_success = False
106124
for idx in range(len(self.pipeline)):
107125
if rng.random() < 0.5:
@@ -111,7 +129,7 @@ def _crossover_inner_step(self, other, rng):
111129
return crossover_success
112130

113131
def export_pipeline(self):
114-
return sklearn.pipeline.make_pipeline(*[step.export_pipeline() for step in self.pipeline])
132+
return sklearn.pipeline.make_pipeline(*[step.export_pipeline() for step in self.pipeline], memory=self.memory)
115133

116134
def unique_id(self):
117135
l = [step.unique_id() for step in self.pipeline]
@@ -122,12 +140,13 @@ def unique_id(self):
122140

123141

124142
class SequentialPipeline(SklearnIndividualGenerator):
125-
def __init__(self, search_spaces : List[SklearnIndividualGenerator] ) -> None:
143+
def __init__(self, search_spaces : List[SklearnIndividualGenerator], memory=None ) -> None:
126144
"""
127145
Takes in a list of search spaces. will produce a pipeline of Sequential length. Each step in the pipeline will correspond to the the search space provided in the same index.
128146
"""
129147

130148
self.search_spaces = search_spaces
149+
self.memory = memory
131150

132151
def generate(self, rng=None):
133-
return SequentialPipelineIndividual(self.search_spaces, rng=rng)
152+
return SequentialPipelineIndividual(self.search_spaces, memory=self.memory, rng=rng)

tpot2/search_spaces/pipelines/union.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ def _crossover(self, other, rng=None):
3434
#swap a random step in the pipeline with the corresponding step in the other pipeline
3535
rng = np.random.default_rng()
3636

37-
cx_funcs = [self._crossover_swap_random_steps, self._crossover_inner_step]
37+
cx_funcs = [self._crossover_inner_step]
3838
rng.shuffle(cx_funcs)
3939
for cx_func in cx_funcs:
4040
if cx_func(other, rng):

0 commit comments

Comments
 (0)