Skip to content

Commit 9db1e33

Browse files
authored
Merge pull request #145 from perib/dev
Dev
2 parents ed95419 + db614d8 commit 9db1e33

13 files changed

+2165
-30
lines changed

.github/workflows/docs.yml

+1
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@ jobs:
2222
pip install --upgrade pip
2323
pip install .
2424
pip install -r docs/requirements_docs.txt
25+
pip install mkdocstrings[python] griffe
2526
2627
# - name: Convert notebooks to HTML
2728
# # if: ${{ github.event_name == 'push' && contains(github.event.head_commit.modified, 'Tutorial/') && contains(github.event.head_commit.modified, '.ipynb') }}

Tutorial/amltk_search_space_parser_example.ipynb

+1,897
Large diffs are not rendered by default.

setup.py

+2-1
Original file line numberDiff line numberDiff line change
@@ -53,7 +53,8 @@ def calculate_version():
5353
extras_require={
5454
'skrebate': ['skrebate>=0.3.4'],
5555
'mdr': ['scikit-mdr>=0.4.4'],
56-
'sklearnex' : ['scikit-learn-intelex>=2023.2.1']
56+
'sklearnex' : ['scikit-learn-intelex>=2023.2.1'],
57+
'amltk' : ['amltk>=1.12.1'],
5758
},
5859
classifiers=[
5960
'Intended Audience :: Science/Research',

tpot2/__init__.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,9 @@
88
from .population import Population
99

1010
from . import builtin_modules
11-
from . import utils
1211
from . import config
1312
from . import search_spaces
13+
from . import utils
1414
from . import evolvers
1515
from . import objectives
1616
from . import selectors

tpot2/builtin_modules/feature_set_selector.py

+3
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,9 @@ def fit(self, X, y=None):
9292

9393
# def transform(self, X):
9494

95+
def _get_tags(self):
96+
tags = {"allow_nan": True, "requires_y": False}
97+
return tags
9598

9699
def _get_support_mask(self):
97100
"""

tpot2/config/classifiers.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -535,7 +535,7 @@ def MLPClassifier_hyperparameter_parser(params):
535535
def get_GaussianProcessClassifier_ConfigurationSpace(n_features, random_state):
536536
space = {
537537
'n_features': n_features,
538-
'alpha': Float("alpha", bounds=(1e-14, 1.0), log=True),
538+
'alpha': Float("alpha", bounds=(1e-10, 1.0), log=True),
539539
'thetaL': Float("thetaL", bounds=(1e-10, 1e-3), log=True),
540540
'thetaU': Float("thetaU", bounds=(1.0, 100000), log=True),
541541
}

tpot2/config/get_configspace.py

+16-16
Original file line numberDiff line numberDiff line change
@@ -406,12 +406,12 @@ def get_configspace(name, n_classes=3, n_samples=1000, n_features=100, random_st
406406
raise ValueError(f"Could not find configspace for {name}")
407407

408408

409-
def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_state=None, return_choice_pipeline=True):
409+
def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_state=None, return_choice_pipeline=True, base_node=EstimatorNode):
410410

411411

412412
#if list of names, return a list of EstimatorNodes
413413
if isinstance(name, list) or isinstance(name, np.ndarray):
414-
search_spaces = [get_search_space(n, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=False) for n in name]
414+
search_spaces = [get_search_space(n, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=False, base_node=base_node) for n in name]
415415
#remove Nones
416416
search_spaces = [s for s in search_spaces if s is not None]
417417

@@ -422,12 +422,12 @@ def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_st
422422

423423
if name in GROUPNAMES:
424424
name_list = GROUPNAMES[name]
425-
return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=return_choice_pipeline)
425+
return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=return_choice_pipeline, base_node=base_node)
426426

427-
return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)
427+
return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, base_node=base_node)
428428

429429

430-
def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None):
430+
def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None, base_node=EstimatorNode):
431431

432432
#these are wrappers that take in another estimator as a parameter
433433
# TODO Add AdaBoostRegressor, AdaBoostClassifier as wrappers? wrap a decision tree with different params?
@@ -461,39 +461,39 @@ def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None
461461
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=imputers.IterativeImputer_hyperparameter_parser)
462462
if name == "RobustScaler":
463463
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
464-
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.robust_scaler_hyperparameter_parser)
464+
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.robust_scaler_hyperparameter_parser)
465465
if name == "GradientBoostingClassifier":
466466
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
467-
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GradientBoostingClassifier_hyperparameter_parser)
467+
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GradientBoostingClassifier_hyperparameter_parser)
468468
if name == "HistGradientBoostingClassifier":
469469
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
470-
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.HistGradientBoostingClassifier_hyperparameter_parser)
470+
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.HistGradientBoostingClassifier_hyperparameter_parser)
471471
if name == "GradientBoostingRegressor":
472472
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
473-
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GradientBoostingRegressor_hyperparameter_parser)
473+
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GradientBoostingRegressor_hyperparameter_parser)
474474
if name == "HistGradientBoostingRegressor":
475475
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
476-
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.HistGradientBoostingRegressor_hyperparameter_parser)
476+
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.HistGradientBoostingRegressor_hyperparameter_parser)
477477
if name == "MLPClassifier":
478478
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
479-
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.MLPClassifier_hyperparameter_parser)
479+
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.MLPClassifier_hyperparameter_parser)
480480
if name == "MLPRegressor":
481481
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
482-
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.MLPRegressor_hyperparameter_parser)
482+
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.MLPRegressor_hyperparameter_parser)
483483
if name == "GaussianProcessRegressor":
484484
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
485-
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GaussianProcessRegressor_hyperparameter_parser)
485+
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GaussianProcessRegressor_hyperparameter_parser)
486486
if name == "GaussianProcessClassifier":
487487
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
488-
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GaussianProcessClassifier_hyperparameter_parser)
488+
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GaussianProcessClassifier_hyperparameter_parser)
489489
if name == "FeatureAgglomeration":
490490
configspace = get_configspace(name, n_features=n_features)
491-
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.FeatureAgglomeration_hyperparameter_parser)
491+
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.FeatureAgglomeration_hyperparameter_parser)
492492

493493
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)
494494
if configspace is None:
495495
#raise warning
496496
warnings.warn(f"Could not find configspace for {name}")
497497
return None
498498

499-
return EstimatorNode(STRING_TO_CLASS[name], configspace)
499+
return base_node(STRING_TO_CLASS[name], configspace)

tpot2/config/regressors.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -354,7 +354,7 @@ def get_ExtraTreesRegressor_ConfigurationSpace(random_state):
354354
def get_GaussianProcessRegressor_ConfigurationSpace(n_features, random_state):
355355
space = {
356356
'n_features': n_features,
357-
'alpha': Float("alpha", bounds=(1e-14, 1.0), log=True),
357+
'alpha': Float("alpha", bounds=(1e-10, 1.0), log=True),
358358
'thetaL': Float("thetaL", bounds=(1e-10, 1e-3), log=True),
359359
'thetaU': Float("thetaU", bounds=(1.0, 100000), log=True),
360360
}

tpot2/search_spaces/base.py

-8
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,10 @@
11
import tpot2
2-
import numpy as np
3-
import pandas as pd
42
import sklearn
5-
from tpot2 import config
6-
from typing import Generator, List, Tuple, Union
7-
import random
83
from sklearn.base import BaseEstimator
94
import sklearn
105
import networkx as nx
116
from . import graph_utils
127
from typing import final
13-
from abc import ABC, abstractmethod
14-
15-
168

179

1810

Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
# try https://automl.github.io/ConfigSpace/main/api/hyperparameters.html
2+
3+
import numpy as np
4+
from tpot2.search_spaces.base import SklearnIndividual, SklearnIndividualGenerator
5+
from ConfigSpace import ConfigurationSpace
6+
from typing import final
7+
import ConfigSpace
8+
9+
10+
NONE_SPECIAL_STRING = "<NONE>"
11+
TRUE_SPECIAL_STRING = "<TRUE>"
12+
FALSE_SPECIAL_STRING = "<FALSE>"
13+
14+
15+
def default_hyperparameter_parser(params:dict) -> dict:
16+
return params
17+
18+
19+
# NOTE: This is not the default, currently experimental
20+
class EstimatorNodeIndividual_gradual(SklearnIndividual):
21+
"""
22+
Note that ConfigurationSpace does not support None as a parameter. Instead, use the special string "<NONE>". TPOT will automatically replace instances of this string with the Python None.
23+
24+
Parameters
25+
----------
26+
method : type
27+
The class of the estimator to be used
28+
29+
space : ConfigurationSpace|dict
30+
The hyperparameter space to be used. If a dict is passed, hyperparameters are fixed and not learned.
31+
32+
"""
33+
def __init__(self, method: type,
34+
space: ConfigurationSpace|dict, #TODO If a dict is passed, hyperparameters are fixed and not learned. Is this confusing? Should we make a second node type?
35+
hyperparameter_parser: callable = None,
36+
rng=None) -> None:
37+
super().__init__()
38+
self.method = method
39+
self.space = space
40+
41+
if hyperparameter_parser is None:
42+
self.hyperparameter_parser = default_hyperparameter_parser
43+
else:
44+
self.hyperparameter_parser = hyperparameter_parser
45+
46+
if isinstance(space, dict):
47+
self.hyperparameters = space
48+
else:
49+
rng = np.random.default_rng(rng)
50+
self.space.seed(rng.integers(0, 2**32))
51+
self.hyperparameters = dict(self.space.sample_configuration())
52+
53+
self.check_hyperparameters_for_None()
54+
55+
def mutate(self, rng=None):
56+
if isinstance(self.space, dict):
57+
return False
58+
self.hyperparameters = gradual_hyperparameter_update(params=self.hyperparameters, configspace=self.space, rng=rng)
59+
self.check_hyperparameters_for_None()
60+
return True
61+
62+
def crossover(self, other, rng=None):
63+
if isinstance(self.space, dict):
64+
return False
65+
66+
rng = np.random.default_rng(rng)
67+
if self.method != other.method:
68+
return False
69+
70+
#loop through hyperparameters, randomly swap items in self.hyperparameters with items in other.hyperparameters
71+
for hyperparameter in self.space:
72+
if rng.choice([True, False]):
73+
if hyperparameter in other.hyperparameters:
74+
self.hyperparameters[hyperparameter] = other.hyperparameters[hyperparameter]
75+
76+
self.check_hyperparameters_for_None()
77+
78+
return True
79+
80+
def check_hyperparameters_for_None(self):
81+
for key, value in self.hyperparameters.items():
82+
#if string
83+
if isinstance(value, str):
84+
if value == NONE_SPECIAL_STRING:
85+
self.hyperparameters[key] = None
86+
elif value == TRUE_SPECIAL_STRING:
87+
self.hyperparameters[key] = True
88+
elif value == FALSE_SPECIAL_STRING:
89+
self.hyperparameters[key] = False
90+
91+
@final #this method should not be overridden, instead override hyperparameter_parser
92+
def export_pipeline(self, **kwargs):
93+
return self.method(**self.hyperparameter_parser(self.hyperparameters))
94+
95+
def unique_id(self):
96+
#return a dictionary of the method and the hyperparameters
97+
method_str = self.method.__name__
98+
params = list(self.hyperparameters.keys())
99+
params = sorted(params)
100+
101+
id_str = f"{method_str}({', '.join([f'{param}={self.hyperparameters[param]}' for param in params])})"
102+
103+
return id_str
104+
105+
def gradual_hyperparameter_update(params:dict, configspace:ConfigurationSpace, rng=None):
106+
rng = np.random.default_rng(rng)
107+
configspace.seed(rng.integers(0, 2**32))
108+
new_params = dict(configspace.sample_configuration())
109+
for param in list(new_params.keys()):
110+
#if parameter is float, multiply by normal distribution
111+
if param not in params:
112+
continue
113+
try:
114+
if issubclass(type(configspace[param]), ConfigSpace.hyperparameters.hyperparameter.FloatHyperparameter):
115+
116+
if configspace[param].log:
117+
new_params[param] = params[param] * rng.lognormal(0, 1)
118+
else:
119+
new_params[param] = params[param] + rng.normal(0, .1)* (configspace[param].upper-configspace[param].lower)
120+
# if check if above or below min and cap
121+
if new_params[param] < configspace[param].lower:
122+
new_params[param] = configspace[param].lower
123+
elif new_params[param] > configspace[param].upper:
124+
new_params[param] = configspace[param].upper
125+
#if parameter is integer, add normal distribution
126+
elif issubclass(type(configspace[param]), ConfigSpace.hyperparameters.hyperparameter.IntegerHyperparameter):
127+
new_params[param] = params[param] * np.random.normal(0, 1)
128+
# if check if above or below min and cap
129+
if new_params[param] < configspace[param].lower:
130+
new_params[param] = configspace[param].lower
131+
elif new_params[param] > configspace[param].upper:
132+
new_params[param] = configspace[param].upper
133+
new_params[param] = int(new_params[param])
134+
except:
135+
pass
136+
137+
return new_params
138+
139+
class EstimatorNode_gradual(SklearnIndividualGenerator):
140+
def __init__(self, method, space, hyperparameter_parser=default_hyperparameter_parser):
141+
self.method = method
142+
self.space = space
143+
self.hyperparameter_parser = hyperparameter_parser
144+
145+
def generate(self, rng=None):
146+
return EstimatorNodeIndividual_gradual(self.method, self.space, hyperparameter_parser=self.hyperparameter_parser, rng=rng)

tpot2/search_spaces/nodes/genetic_feature_selection.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -15,15 +15,29 @@
1515
class MaskSelector(BaseEstimator, SelectorMixin):
1616
"""Select predefined feature subsets."""
1717

18-
def __init__(self, mask):
18+
def __init__(self, mask, set_output_transform=None):
1919
self.mask = mask
20+
self.set_output_transform = set_output_transform
21+
if set_output_transform is not None:
22+
self.set_output(transform=set_output_transform)
2023

2124
def fit(self, X, y=None):
25+
self.n_features_in_ = X.shape[1]
26+
if isinstance(X, pd.DataFrame):
27+
self.feature_names_in_ = X.columns
28+
# self.set_output(transform="pandas")
29+
self.is_fitted_ = True #so sklearn knows it's fitted
2230
return self
2331

32+
def _get_tags(self):
33+
tags = {"allow_nan": True, "requires_y": False}
34+
return tags
35+
2436
def _get_support_mask(self):
2537
return np.array(self.mask)
2638

39+
def get_feature_names_out(self, input_features=None):
40+
return self.feature_names_in_[self.get_support()]
2741

2842
class GeneticFeatureSelectorIndividual(SklearnIndividual):
2943
def __init__( self,

tpot2/utils/__init__.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,11 @@
11
from . import eval_utils
2-
from .utils import *
2+
from .utils import *
3+
4+
# If amltk is installed, import the parser
5+
try:
6+
from .amltk_parser import tpot2_parser
7+
except ImportError:
8+
# Handle the case when amltk is not installed
9+
pass
10+
# print("amltk is not installed. Please install it to use tpot2_parser.")
11+
# Optional: raise an exception or provide alternative functionality

0 commit comments

Comments
 (0)