Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added unique_id, fixed issue where graphpipeline not correctly identi… #127

Merged
merged 1 commit into from
Apr 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1,101 changes: 532 additions & 569 deletions Tutorial/2_Search_Spaces.ipynb

Large diffs are not rendered by default.

8 changes: 6 additions & 2 deletions tpot2/config/classifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ def get_LinearDiscriminantAnalysis_ConfigurationSpace():

#### Gradient Boosting Classifiers

def get_GradientBoostingClassifier_ConfigurationSpace(n_features, random_state):
def get_GradientBoostingClassifier_ConfigurationSpace(n_classes, n_features, random_state):
early_stop = Categorical("early_stop", ["off", "valid", "train"])
n_iter_no_change = Integer("n_iter_no_change",bounds=(1,20))
validation_fraction = Float("validation_fraction", bounds=(0.01, 0.4))
Expand All @@ -316,7 +316,6 @@ def get_GradientBoostingClassifier_ConfigurationSpace(n_features, random_state):
validation_fraction_cond = EqualsCondition(validation_fraction, early_stop, "valid")

space = {
'loss': Categorical("loss", ['log_loss', 'exponential']),
'learning_rate': Float("learning_rate", bounds=(1e-3, 1), log=True),
'min_samples_leaf': Integer("min_samples_leaf", bounds=(1, 200)),
'min_samples_split': Integer("min_samples_split", bounds=(2, 20)),
Expand All @@ -327,6 +326,11 @@ def get_GradientBoostingClassifier_ConfigurationSpace(n_features, random_state):
'tol': 1e-4,
}

if n_classes == 2:
space['loss']= Categorical("loss", ['log_loss', 'exponential'])
else:
space['loss'] = "log_loss"

if random_state is not None: #This is required because configspace doesn't allow None as a value
space['random_state'] = random_state

Expand Down
2 changes: 1 addition & 1 deletion tpot2/config/get_configspace.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,7 @@ def get_configspace(name, n_classes=3, n_samples=100, n_features=100, random_sta
case "RandomForestClassifier":
return classifiers.get_RandomForestClassifier_ConfigurationSpace(n_features=n_features, random_state=random_state)
case "GradientBoostingClassifier":
return classifiers.get_GradientBoostingClassifier_ConfigurationSpace(n_features=n_features, random_state=random_state)
return classifiers.get_GradientBoostingClassifier_ConfigurationSpace(n_classes=n_classes, n_features=n_features, random_state=random_state)
case "HistGradientBoostingClassifier":
return classifiers.get_HistGradientBoostingClassifier_ConfigurationSpace(n_features=n_features, random_state=random_state)
case "XGBClassifier":
Expand Down
6 changes: 4 additions & 2 deletions tpot2/graphsklearn.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from sklearn.utils.validation import check_memory
from sklearn.preprocessing import LabelEncoder

from sklearn.base import is_classifier, is_regressor

#labels - str
#attributes - "instance" -> instance of the type

Expand Down Expand Up @@ -128,7 +130,7 @@ def fit_sklearn_digraph(graph: nx.DiGraph,
# instance.fit(this_X, y)


if issubclass(type(instance), sklearn.base.RegressorMixin) or issubclass(type(instance), sklearn.base.ClassifierMixin):
if is_classifier(instance) or is_regressor(instance):
transformed, instance = estimator_fit_transform_override_cross_val_predict_cached(instance, this_X, y, cv=cross_val_predict_cv, method=method)
else:
transformed, instance = fit_transform_one_cached(instance, this_X, y)#instance.fit_transform(this_X,y)
Expand Down Expand Up @@ -168,7 +170,7 @@ def transform_sklearn_digraph(graph: nx.DiGraph,
else:
this_X = np.hstack([transformed_steps[child] for child in get_ordered_successors(graph, node)])

if issubclass(type(instance), sklearn.base.RegressorMixin) or issubclass(type(instance), sklearn.base.ClassifierMixin):
if is_classifier(instance) or is_regressor(instance):
this_method = _method_name(instance.__class__.__name__, instance, method)
transformed = getattr(instance, this_method)(this_X)
else:
Expand Down
4 changes: 0 additions & 4 deletions tpot2/search_spaces/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,10 +40,6 @@ def generate(self, rng=None) -> SklearnIndividual:
pass






def flatten_graphpipeline(est):
flattened_full_graph = est.graph.copy()

Expand Down
2 changes: 1 addition & 1 deletion tpot2/search_spaces/nodes/estimator_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ def export_pipeline(self, **kwargs):

def unique_id(self):
#return a dictionary of the method and the hyperparameters
return (self.method, self.hyperparameters)
return (self.method, str(tuple(sorted(list(self.hyperparameter_parser(self.hyperparameters).items())))))

class EstimatorNode(SklearnIndividualGenerator):
def __init__(self, method, space, hyperparameter_parser=default_hyperparameter_parser):
Expand Down
2 changes: 1 addition & 1 deletion tpot2/search_spaces/nodes/estimator_node_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ class EstimatorNodeIndividual(SklearnIndividual):
def __init__(self, method, space ) -> None:
super().__init__()
self.method = method
self.space = space
self.space = space #a dictionary. keys are hyperparameters, values are the space of the hyperparameter. If list, then hyperparameter is categorical. If tuple, then hyperparameter is continuous. If single value, then hyperparameter is fixed.

self._mutate_hyperparameters()

Expand Down
2 changes: 1 addition & 1 deletion tpot2/search_spaces/nodes/fss_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def export_pipeline(self):


def unique_id(self):
return self.selected_subset_name
return ("FSS", self.selected_subset_name)


class FSSNode(SklearnIndividualGenerator):
Expand Down
2 changes: 1 addition & 1 deletion tpot2/search_spaces/nodes/genetic_feature_selection.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,7 +146,7 @@ def export_pipeline(self):


def unique_id(self):
return self.mask
return tuple(self.mask)


class GeneticFeatureSelectorNode(SklearnIndividualGenerator):
Expand Down
65 changes: 57 additions & 8 deletions tpot2/search_spaces/pipelines/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from ..nodes.estimator_node import EstimatorNodeIndividual
from typing import Union, Callable
import sklearn
from functools import partial

class GraphPipelineIndividual(SklearnIndividual):
"""
Expand Down Expand Up @@ -100,10 +101,10 @@ def __init__(

self.merge_duplicated_nodes_toggle = True

self.graphkey = None

def mutate(self, rng=None):
rng = np.random.default_rng(rng)
self.key = None

rng.shuffle(self.mutate_methods_list)
for mutate_method in self.mutate_methods_list:
Expand All @@ -130,7 +131,8 @@ def mutate(self, rng=None):
print('something went wrong with ', mutate_method)
except:
pass


self.graphkey = None
return True

return False
Expand Down Expand Up @@ -335,6 +337,9 @@ def crossover(self, ind2, rng=None):
print('something went wrong with ', crossover_method)
except:
pass

if finished:
self.graphkey = None

return finished

Expand Down Expand Up @@ -661,12 +666,17 @@ def plot(self):
plt.show()






def unique_id(self):
return self
if self.graphkey is None:
#copy self.graph
new_graph = self.graph.copy()
for n in new_graph.nodes:
new_graph.nodes[n]['label'] = n.unique_id()

new_graph = nx.convert_node_labels_to_integers(new_graph)
self.graphkey = GraphKey(new_graph)

return self.graphkey


class GraphPipeline(SklearnIndividualGenerator):
Expand Down Expand Up @@ -753,4 +763,43 @@ def generate(self, rng=None):
func = rng.choice(starting_ops)
func(rng=rng)

return ind
return ind





class GraphKey():
'''
A class that can be used as a key for a graph.

Parameters
----------
graph : (nx.Graph)
The graph to use as a key. Node Attributes are used for the hash.
matched_label : (str)
The node attribute to consider for the hash.
'''

def __init__(self, graph, matched_label='label') -> None:#['hyperparameters', 'method_class']) -> None:


self.graph = graph
self.matched_label = matched_label
self.node_match = partial(node_match, matched_labels=[matched_label])
self.key = int(nx.weisfeiler_lehman_graph_hash(self.graph, node_attr=self.matched_label),16) #hash(tuple(sorted([val for (node, val) in self.graph.degree()])))


#If hash is different, node is definitely different
# https://arxiv.org/pdf/2002.06653.pdf
def __hash__(self) -> int:

return self.key

#If hash is same, use __eq__ to know if they are actually different
def __eq__(self, other):
return nx.is_isomorphic(self.graph, other.graph, node_match=self.node_match)

def node_match(n1,n2, matched_labels):
return all( [ n1[m] == n2[m] for m in matched_labels])

2 changes: 1 addition & 1 deletion tpot2/search_spaces/pipelines/sequential.py
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ def export_pipeline(self):
return sklearn.pipeline.make_pipeline(*[step.export_pipeline() for step in self.pipeline])

def unique_id(self):
return self
return tuple([step.unique_id() for step in self.pipeline])


class SequentialPipeline(SklearnIndividualGenerator):
Expand Down
21 changes: 16 additions & 5 deletions tpot2/search_spaces/pipelines/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@ class WrapperPipelineIndividual(SklearnIndividual):
def __init__(self,
nodegen: SklearnIndividualGenerator,
method: type,
space: ConfigurationSpace,
space: ConfigurationSpace,
hyperparameter_parser: callable = None,
rng=None) -> None:


Expand All @@ -30,7 +31,7 @@ def __init__(self,
self.space.seed(rng.integers(0, 2**32))
self.hyperparameters = dict(self.space.sample_configuration())


self.hyperparameters_parser = hyperparameter_parser


def mutate(self, rng=None):
Expand All @@ -53,14 +54,24 @@ def crossover(self, other, rng=None):
return self.node.crossover(other.node, rng)

def export_pipeline(self):

if self.hyperparameters_parser is not None:
final_params = self.hyperparameters_parser(self.hyperparameters)
else:
final_params = self.hyperparameters

est = self.node.export_pipeline()
wrapped_est = self.method(est, **self.hyperparameters)
wrapped_est = self.method(est, **final_params)
return wrapped_est


def unique_id(self):
return self.node.unique_id()

if self.hyperparameters_parser is not None:
final_params = self.hyperparameters_parser(self.hyperparameters)
else:
final_params = self.hyperparameters

return (self.method, str(tuple(sorted(list(final_params.items())))) ,self.node.unique_id())


class WrapperPipeline(SklearnIndividualGenerator):
Expand Down
8 changes: 4 additions & 4 deletions tpot2/tests/test_estimators.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def tpot_estimator():
search_space = tpot2.search_spaces.pipelines.GraphPipeline(
root_search_space= tpot2.config.get_search_space("classifiers", n_samples=n_samples, n_features=n_features, n_classes=n_classes),
leaf_search_space = None,
inner_search_space = tpot2.config.get_search_space(["selectors","transformers","classifiers"],n_samples=n_samples, n_features=n_features, n_classes=n_classes),
inner_search_space = tpot2.config.get_search_space(["selectors","transformers"],n_samples=n_samples, n_features=n_features, n_classes=n_classes),
max_size = 10,
)
return tpot2.TPOTEstimator(
Expand All @@ -39,11 +39,11 @@ def tpot_estimator():

@pytest.fixture
def tpot_classifier():
return tpot2.tpot_estimator.templates.TPOTClassifier(max_time_seconds=10,verbose=3)
return tpot2.tpot_estimator.templates.TPOTClassifier(max_time_seconds=10,verbose=0)

@pytest.fixture
def tpot_regressor():
return tpot2.tpot_estimator.templates.TPOTRegressor(max_time_seconds=10,verbose=3)
return tpot2.tpot_estimator.templates.TPOTRegressor(max_time_seconds=10,verbose=0)



Expand Down Expand Up @@ -116,7 +116,7 @@ def test_tpot_regressor_fit(tpot_regressor):

scorer = sklearn.metrics.get_scorer('neg_mean_squared_error')
X, y = sklearn.datasets.load_diabetes(return_X_y=True)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.75, test_size=0.25)
X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split(X, y, train_size=0.05, test_size=0.95)
tpot_regressor.fit(X_train, y_train)
assert tpot_regressor.fitted_pipeline_ is not None

4 changes: 0 additions & 4 deletions tpot2/tests/test_hello_world.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,6 @@ def test_hello_world(test_input, expected):
assert test_input is expected


@pytest.mark.xfail(reason="Not yet implemented")
def test_divide_by_zero():
assert 1 / 0 == 1


def test_print(capture_stdout):
print("Hello World")
Expand Down
Loading
Loading