Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Dev #145

Merged
merged 16 commits into from
Sep 11, 2024
Merged

Dev #145

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .github/workflows/docs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ jobs:
pip install --upgrade pip
pip install .
pip install -r docs/requirements_docs.txt
pip install mkdocstrings[python] griffe

# - name: Convert notebooks to HTML
# # if: ${{ github.event_name == 'push' && contains(github.event.head_commit.modified, 'Tutorial/') && contains(github.event.head_commit.modified, '.ipynb') }}
Expand Down
1,897 changes: 1,897 additions & 0 deletions Tutorial/amltk_search_space_parser_example.ipynb

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,8 @@ def calculate_version():
extras_require={
'skrebate': ['skrebate>=0.3.4'],
'mdr': ['scikit-mdr>=0.4.4'],
'sklearnex' : ['scikit-learn-intelex>=2023.2.1']
'sklearnex' : ['scikit-learn-intelex>=2023.2.1'],
'amltk' : ['amltk>=1.12.1'],
},
classifiers=[
'Intended Audience :: Science/Research',
Expand Down
2 changes: 1 addition & 1 deletion tpot2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,9 @@
from .population import Population

from . import builtin_modules
from . import utils
from . import config
from . import search_spaces
from . import utils
from . import evolvers
from . import objectives
from . import selectors
Expand Down
3 changes: 3 additions & 0 deletions tpot2/builtin_modules/feature_set_selector.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ def fit(self, X, y=None):

# def transform(self, X):

def _get_tags(self):
tags = {"allow_nan": True, "requires_y": False}
return tags

def _get_support_mask(self):
"""
Expand Down
2 changes: 1 addition & 1 deletion tpot2/config/classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -535,7 +535,7 @@ def MLPClassifier_hyperparameter_parser(params):
def get_GaussianProcessClassifier_ConfigurationSpace(n_features, random_state):
space = {
'n_features': n_features,
'alpha': Float("alpha", bounds=(1e-14, 1.0), log=True),
'alpha': Float("alpha", bounds=(1e-10, 1.0), log=True),
'thetaL': Float("thetaL", bounds=(1e-10, 1e-3), log=True),
'thetaU': Float("thetaU", bounds=(1.0, 100000), log=True),
}
Expand Down
32 changes: 16 additions & 16 deletions tpot2/config/get_configspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -406,12 +406,12 @@ def get_configspace(name, n_classes=3, n_samples=1000, n_features=100, random_st
raise ValueError(f"Could not find configspace for {name}")


def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_state=None, return_choice_pipeline=True):
def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_state=None, return_choice_pipeline=True, base_node=EstimatorNode):


#if list of names, return a list of EstimatorNodes
if isinstance(name, list) or isinstance(name, np.ndarray):
search_spaces = [get_search_space(n, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=False) for n in name]
search_spaces = [get_search_space(n, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=False, base_node=base_node) for n in name]
#remove Nones
search_spaces = [s for s in search_spaces if s is not None]

Expand All @@ -422,12 +422,12 @@ def get_search_space(name, n_classes=3, n_samples=100, n_features=100, random_st

if name in GROUPNAMES:
name_list = GROUPNAMES[name]
return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=return_choice_pipeline)
return get_search_space(name_list, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, return_choice_pipeline=return_choice_pipeline, base_node=base_node)

return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)
return get_node(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state, base_node=base_node)


def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None):
def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None, base_node=EstimatorNode):

#these are wrappers that take in another estimator as a parameter
# TODO Add AdaBoostRegressor, AdaBoostClassifier as wrappers? wrap a decision tree with different params?
Expand Down Expand Up @@ -461,39 +461,39 @@ def get_node(name, n_classes=3, n_samples=100, n_features=100, random_state=None
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=imputers.IterativeImputer_hyperparameter_parser)
if name == "RobustScaler":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.robust_scaler_hyperparameter_parser)
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.robust_scaler_hyperparameter_parser)
if name == "GradientBoostingClassifier":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GradientBoostingClassifier_hyperparameter_parser)
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GradientBoostingClassifier_hyperparameter_parser)
if name == "HistGradientBoostingClassifier":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.HistGradientBoostingClassifier_hyperparameter_parser)
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.HistGradientBoostingClassifier_hyperparameter_parser)
if name == "GradientBoostingRegressor":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GradientBoostingRegressor_hyperparameter_parser)
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GradientBoostingRegressor_hyperparameter_parser)
if name == "HistGradientBoostingRegressor":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.HistGradientBoostingRegressor_hyperparameter_parser)
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.HistGradientBoostingRegressor_hyperparameter_parser)
if name == "MLPClassifier":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.MLPClassifier_hyperparameter_parser)
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.MLPClassifier_hyperparameter_parser)
if name == "MLPRegressor":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.MLPRegressor_hyperparameter_parser)
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.MLPRegressor_hyperparameter_parser)
if name == "GaussianProcessRegressor":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GaussianProcessRegressor_hyperparameter_parser)
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=regressors.GaussianProcessRegressor_hyperparameter_parser)
if name == "GaussianProcessClassifier":
configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, random_state=random_state)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GaussianProcessClassifier_hyperparameter_parser)
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=classifiers.GaussianProcessClassifier_hyperparameter_parser)
if name == "FeatureAgglomeration":
configspace = get_configspace(name, n_features=n_features)
return EstimatorNode(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.FeatureAgglomeration_hyperparameter_parser)
return base_node(STRING_TO_CLASS[name], configspace, hyperparameter_parser=transformers.FeatureAgglomeration_hyperparameter_parser)

configspace = get_configspace(name, n_classes=n_classes, n_samples=n_samples, n_features=n_features, random_state=random_state)
if configspace is None:
#raise warning
warnings.warn(f"Could not find configspace for {name}")
return None

return EstimatorNode(STRING_TO_CLASS[name], configspace)
return base_node(STRING_TO_CLASS[name], configspace)
2 changes: 1 addition & 1 deletion tpot2/config/regressors.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,7 +354,7 @@ def get_ExtraTreesRegressor_ConfigurationSpace(random_state):
def get_GaussianProcessRegressor_ConfigurationSpace(n_features, random_state):
space = {
'n_features': n_features,
'alpha': Float("alpha", bounds=(1e-14, 1.0), log=True),
'alpha': Float("alpha", bounds=(1e-10, 1.0), log=True),
'thetaL': Float("thetaL", bounds=(1e-10, 1e-3), log=True),
'thetaU': Float("thetaU", bounds=(1.0, 100000), log=True),
}
Expand Down
8 changes: 0 additions & 8 deletions tpot2/search_spaces/base.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,10 @@
import tpot2
import numpy as np
import pandas as pd
import sklearn
from tpot2 import config
from typing import Generator, List, Tuple, Union
import random
from sklearn.base import BaseEstimator
import sklearn
import networkx as nx
from . import graph_utils
from typing import final
from abc import ABC, abstractmethod





Expand Down
146 changes: 146 additions & 0 deletions tpot2/search_spaces/nodes/estimator_node_gradual.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# try https://automl.github.io/ConfigSpace/main/api/hyperparameters.html

import numpy as np
from tpot2.search_spaces.base import SklearnIndividual, SklearnIndividualGenerator
from ConfigSpace import ConfigurationSpace
from typing import final
import ConfigSpace


NONE_SPECIAL_STRING = "<NONE>"
TRUE_SPECIAL_STRING = "<TRUE>"
FALSE_SPECIAL_STRING = "<FALSE>"


def default_hyperparameter_parser(params:dict) -> dict:
return params


# NOTE: This is not the default, currently experimental
class EstimatorNodeIndividual_gradual(SklearnIndividual):
"""
Note that ConfigurationSpace does not support None as a parameter. Instead, use the special string "<NONE>". TPOT will automatically replace instances of this string with the Python None.

Parameters
----------
method : type
The class of the estimator to be used

space : ConfigurationSpace|dict
The hyperparameter space to be used. If a dict is passed, hyperparameters are fixed and not learned.

"""
def __init__(self, method: type,
space: ConfigurationSpace|dict, #TODO If a dict is passed, hyperparameters are fixed and not learned. Is this confusing? Should we make a second node type?
hyperparameter_parser: callable = None,
rng=None) -> None:
super().__init__()
self.method = method
self.space = space

if hyperparameter_parser is None:
self.hyperparameter_parser = default_hyperparameter_parser
else:
self.hyperparameter_parser = hyperparameter_parser

if isinstance(space, dict):
self.hyperparameters = space
else:
rng = np.random.default_rng(rng)
self.space.seed(rng.integers(0, 2**32))
self.hyperparameters = dict(self.space.sample_configuration())

self.check_hyperparameters_for_None()

def mutate(self, rng=None):
if isinstance(self.space, dict):
return False
self.hyperparameters = gradual_hyperparameter_update(params=self.hyperparameters, configspace=self.space, rng=rng)
self.check_hyperparameters_for_None()
return True

def crossover(self, other, rng=None):
if isinstance(self.space, dict):
return False

rng = np.random.default_rng(rng)
if self.method != other.method:
return False

#loop through hyperparameters, randomly swap items in self.hyperparameters with items in other.hyperparameters
for hyperparameter in self.space:
if rng.choice([True, False]):
if hyperparameter in other.hyperparameters:
self.hyperparameters[hyperparameter] = other.hyperparameters[hyperparameter]

self.check_hyperparameters_for_None()

return True

def check_hyperparameters_for_None(self):
for key, value in self.hyperparameters.items():
#if string
if isinstance(value, str):
if value == NONE_SPECIAL_STRING:
self.hyperparameters[key] = None
elif value == TRUE_SPECIAL_STRING:
self.hyperparameters[key] = True
elif value == FALSE_SPECIAL_STRING:
self.hyperparameters[key] = False

@final #this method should not be overridden, instead override hyperparameter_parser
def export_pipeline(self, **kwargs):
return self.method(**self.hyperparameter_parser(self.hyperparameters))

def unique_id(self):
#return a dictionary of the method and the hyperparameters
method_str = self.method.__name__
params = list(self.hyperparameters.keys())
params = sorted(params)

id_str = f"{method_str}({', '.join([f'{param}={self.hyperparameters[param]}' for param in params])})"

return id_str

def gradual_hyperparameter_update(params:dict, configspace:ConfigurationSpace, rng=None):
rng = np.random.default_rng(rng)
configspace.seed(rng.integers(0, 2**32))
new_params = dict(configspace.sample_configuration())
for param in list(new_params.keys()):
#if parameter is float, multiply by normal distribution
if param not in params:
continue
try:
if issubclass(type(configspace[param]), ConfigSpace.hyperparameters.hyperparameter.FloatHyperparameter):

if configspace[param].log:
new_params[param] = params[param] * rng.lognormal(0, 1)
else:
new_params[param] = params[param] + rng.normal(0, .1)* (configspace[param].upper-configspace[param].lower)
# if check if above or below min and cap
if new_params[param] < configspace[param].lower:
new_params[param] = configspace[param].lower
elif new_params[param] > configspace[param].upper:
new_params[param] = configspace[param].upper
#if parameter is integer, add normal distribution
elif issubclass(type(configspace[param]), ConfigSpace.hyperparameters.hyperparameter.IntegerHyperparameter):
new_params[param] = params[param] * np.random.normal(0, 1)
# if check if above or below min and cap
if new_params[param] < configspace[param].lower:
new_params[param] = configspace[param].lower
elif new_params[param] > configspace[param].upper:
new_params[param] = configspace[param].upper
new_params[param] = int(new_params[param])
except:
pass

return new_params

class EstimatorNode_gradual(SklearnIndividualGenerator):
def __init__(self, method, space, hyperparameter_parser=default_hyperparameter_parser):
self.method = method
self.space = space
self.hyperparameter_parser = hyperparameter_parser

def generate(self, rng=None):
return EstimatorNodeIndividual_gradual(self.method, self.space, hyperparameter_parser=self.hyperparameter_parser, rng=rng)
16 changes: 15 additions & 1 deletion tpot2/search_spaces/nodes/genetic_feature_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,29 @@
class MaskSelector(BaseEstimator, SelectorMixin):
"""Select predefined feature subsets."""

def __init__(self, mask):
def __init__(self, mask, set_output_transform=None):
self.mask = mask
self.set_output_transform = set_output_transform
if set_output_transform is not None:
self.set_output(transform=set_output_transform)

def fit(self, X, y=None):
self.n_features_in_ = X.shape[1]
if isinstance(X, pd.DataFrame):
self.feature_names_in_ = X.columns
# self.set_output(transform="pandas")
self.is_fitted_ = True #so sklearn knows it's fitted
return self

def _get_tags(self):
tags = {"allow_nan": True, "requires_y": False}
return tags

def _get_support_mask(self):
return np.array(self.mask)

def get_feature_names_out(self, input_features=None):
return self.feature_names_in_[self.get_support()]

class GeneticFeatureSelectorIndividual(SklearnIndividual):
def __init__( self,
Expand Down
11 changes: 10 additions & 1 deletion tpot2/utils/__init__.py
Original file line number Diff line number Diff line change
@@ -1,2 +1,11 @@
from . import eval_utils
from .utils import *
from .utils import *

# If amltk is installed, import the parser
try:
from .amltk_parser import tpot2_parser
except ImportError:
# Handle the case when amltk is not installed
pass
# print("amltk is not installed. Please install it to use tpot2_parser.")
# Optional: raise an exception or provide alternative functionality
Loading
Loading