Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add checks for invalid metrics and invalid dataset #382

Merged
merged 1 commit into from
Mar 22, 2017
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 19 additions & 1 deletion tpot/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
from datetime import datetime
from pathos.multiprocessing import ProcessPool


import numpy as np
import deap
from deap import base, creator, tools, gp
Expand All @@ -39,6 +40,7 @@
from sklearn.preprocessing import FunctionTransformer
from sklearn.ensemble import VotingClassifier
from sklearn.metrics.scorer import make_scorer
from sklearn.tree import DecisionTreeRegressor, DecisionTreeClassifier

from update_checker import update_check

Expand Down Expand Up @@ -110,7 +112,8 @@ def __init__(self, generations=100, population_size=100, offspring_size=None,
TPOT assumes that this scoring function should be maximized, i.e.,
higher is better.

Offers the same options as sklearn.model_selection.cross_val_score:
Offers the same options as sklearn.model_selection.cross_val_score as well as
a built-in score "balanced_accuracy":

['accuracy', 'adjusted_rand_score', 'average_precision', 'f1',
'f1_macro', 'f1_micro', 'f1_samples', 'f1_weighted',
Expand Down Expand Up @@ -233,6 +236,9 @@ def __init__(self, generations=100, population_size=100, offspring_size=None,
SCORERS[scoring_name] = make_scorer(scoring, greater_is_better=greater_is_better)
self.scoring_function = scoring_name
else:
if scoring not in SCORERS:
raise TypeError('The scoring function {} is not available. '
'Please choose scoring function on TPOT manual'.format(scoring))
self.scoring_function = scoring

self.cv = cv
Expand Down Expand Up @@ -335,6 +341,18 @@ def fit(self, features, classes, sample_weight=None):
"""
features = features.astype(np.float64)

# check input data format
if self.classification:
clf = DecisionTreeClassifier(max_depth=5)
else:
clf = DecisionTreeRegressor(max_depth=5)

try:
clf = clf.fit(features, classes)
except:
raise TypeError('Warning: TypeError in input dataset. Please check your data format! \n'
'Tips: features need to a 2-D array but classes should be a 1-D array.')

# Set the seed for the GP run
if self.random_state is not None:
random.seed(self.random_state) # deap use random
Expand Down