Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 22 additions & 18 deletions autoPyTorch/api/base_task.py
Original file line number Diff line number Diff line change
Expand Up @@ -551,7 +551,7 @@ def _do_dummy_prediction(self, num_run: int) -> None:
% (str(status), str(additional_info))
)

def _do_traditional_prediction(self, num_run: int, time_left: int, func_eval_time_limit: int
def _do_traditional_prediction(self, num_run: int, time_left: int, func_eval_time_limit_secs: int
) -> int:
"""
Fits traditional machine learning algorithms to the provided dataset, while
Expand All @@ -567,7 +567,7 @@ def _do_traditional_prediction(self, num_run: int, time_left: int, func_eval_tim
Hard limit on how many machine learning algorithms can be fit. Depending on how
fast a traditional machine learning algorithm trains, it will allow multiple
models to be fitted.
func_eval_time_limit: (int)
func_eval_time_limit_secs: (int)
Maximum training time each algorithm is allowed to take, during training

Returns:
Expand All @@ -593,8 +593,8 @@ def _do_traditional_prediction(self, num_run: int, time_left: int, func_eval_tim

# Only launch a task if there is time
start_time = time.time()
if time_left >= func_eval_time_limit:
self._logger.info(f"{n_r}: Started fitting {classifier} with cutoff={func_eval_time_limit}")
if time_left >= func_eval_time_limit_secs:
self._logger.info(f"{n_r}: Started fitting {classifier} with cutoff={func_eval_time_limit_secs}")
scenario_mock = unittest.mock.Mock()
scenario_mock.wallclock_limit = time_left
# This stats object is a hack - maybe the SMAC stats object should
Expand All @@ -618,7 +618,7 @@ def _do_traditional_prediction(self, num_run: int, time_left: int, func_eval_tim
classifier,
self._dask_client.submit(
ta.run, config=classifier,
cutoff=func_eval_time_limit,
cutoff=func_eval_time_limit_secs,
)
])

Expand All @@ -637,7 +637,7 @@ def _do_traditional_prediction(self, num_run: int, time_left: int, func_eval_tim

# How many workers to wait before starting fitting the next iteration
workers_to_wait = 1
if n_r >= total_number_classifiers - 1 or time_left <= func_eval_time_limit:
if n_r >= total_number_classifiers - 1 or time_left <= func_eval_time_limit_secs:
# If on the last iteration, flush out all tasks
workers_to_wait = len(dask_futures)

Expand Down Expand Up @@ -672,7 +672,7 @@ def _do_traditional_prediction(self, num_run: int, time_left: int, func_eval_tim
time_left -= int(time.time() - start_time)

# Exit if no more time is available for a new classifier
if time_left < func_eval_time_limit:
if time_left < func_eval_time_limit_secs:
self._logger.warning("Not enough time to fit all traditional machine learning models."
"Please consider increasing the run time to further improve performance.")
break
Expand All @@ -686,7 +686,7 @@ def _search(
budget_type: Optional[str] = None,
budget: Optional[float] = None,
total_walltime_limit: int = 100,
func_eval_time_limit: Optional[int] = None,
func_eval_time_limit_secs: Optional[int] = None,
enable_traditional_pipeline: bool = True,
memory_limit: Optional[int] = 4096,
smac_scenario_args: Optional[Dict[str, Any]] = None,
Expand Down Expand Up @@ -721,13 +721,17 @@ def _search(
in seconds for the search of appropriate models.
By increasing this value, autopytorch has a higher
chance of finding better models.
func_eval_time_limit (int), (default=None): Time limit
func_eval_time_limit_secs (int), (default=None): Time limit
for a single call to the machine learning model.
Model fitting will be terminated if the machine
learning algorithm runs over the time limit. Set
this value high enough so that typical machine
learning algorithms can be fit on the training
data.
When set to None, this time will automatically be set to
total_walltime_limit // 2 to allow enough time to fit
at least 2 individual machine learning algorithms.
Set to np.inf in case no time limit is desired.
enable_traditional_pipeline (bool), (default=True):
We fit traditional machine learning algorithms
(LightGBM, CatBoost, RandomForest, ExtraTrees, KNN, SVM)
Expand Down Expand Up @@ -823,22 +827,22 @@ def _search(
# Handle time resource allocation
elapsed_time = self._stopwatch.wall_elapsed(experiment_task_name)
time_left_for_modelfit = int(max(0, total_walltime_limit - elapsed_time))
if func_eval_time_limit is None or func_eval_time_limit > time_left_for_modelfit:
if func_eval_time_limit_secs is None or func_eval_time_limit_secs > time_left_for_modelfit:
self._logger.warning(
'Time limit for a single run is higher than total time '
'limit. Capping the limit for a single run to the total '
'time given to SMAC (%f)' % time_left_for_modelfit
)
func_eval_time_limit = time_left_for_modelfit
func_eval_time_limit_secs = time_left_for_modelfit

# Make sure that at least 2 models are created for the ensemble process
num_models = time_left_for_modelfit // func_eval_time_limit
num_models = time_left_for_modelfit // func_eval_time_limit_secs
if num_models < 2:
func_eval_time_limit = time_left_for_modelfit // 2
func_eval_time_limit_secs = time_left_for_modelfit // 2
self._logger.warning(
"Capping the func_eval_time_limit to {} to have "
"Capping the func_eval_time_limit_secs to {} to have "
"time for a least 2 models to ensemble.".format(
func_eval_time_limit
func_eval_time_limit_secs
)
)

Expand All @@ -860,10 +864,10 @@ def _search(
elapsed_time = self._stopwatch.wall_elapsed(self.dataset_name)
# We want time for at least 1 Neural network in SMAC
time_for_traditional = int(
self._time_for_task - elapsed_time - func_eval_time_limit
self._time_for_task - elapsed_time - func_eval_time_limit_secs
)
num_run = self._do_traditional_prediction(
num_run=num_run + 1, func_eval_time_limit=func_eval_time_limit,
num_run=num_run + 1, func_eval_time_limit_secs=func_eval_time_limit_secs,
time_left=time_for_traditional,
)
self._stopwatch.stop_task(traditional_task_name)
Expand Down Expand Up @@ -923,7 +927,7 @@ def _search(
dataset_name=dataset.dataset_name,
backend=self._backend,
total_walltime_limit=total_walltime_limit,
func_eval_time_limit=func_eval_time_limit,
func_eval_time_limit_secs=func_eval_time_limit_secs,
dask_client=self._dask_client,
memory_limit=self._memory_limit,
n_jobs=self.n_jobs,
Expand Down
12 changes: 8 additions & 4 deletions autoPyTorch/api/tabular_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def search(
budget_type: Optional[str] = None,
budget: Optional[float] = None,
total_walltime_limit: int = 100,
func_eval_time_limit: Optional[int] = None,
func_eval_time_limit_secs: Optional[int] = None,
enable_traditional_pipeline: bool = True,
memory_limit: Optional[int] = 4096,
smac_scenario_args: Optional[Dict[str, Any]] = None,
Expand Down Expand Up @@ -156,17 +156,21 @@ def search(
in seconds for the search of appropriate models.
By increasing this value, autopytorch has a higher
chance of finding better models.
func_eval_time_limit (int), (default=None): Time limit
func_eval_time_limit_secs (int), (default=None): Time limit
for a single call to the machine learning model.
Model fitting will be terminated if the machine
learning algorithm runs over the time limit. Set
this value high enough so that typical machine
learning algorithms can be fit on the training
data.
When set to None, this time will automatically be set to
total_walltime_limit // 2 to allow enough time to fit
at least 2 individual machine learning algorithms.
Set to np.inf in case no time limit is desired.
enable_traditional_pipeline (bool), (default=True):
We fit traditional machine learning algorithms
(LightGBM, CatBoost, RandomForest, ExtraTrees, KNN, SVM)
prior building PyTorch Neural Networks. You can disable this
before building PyTorch Neural Networks. You can disable this
feature by turning this flag to False. All machine learning
algorithms that are fitted during search() are considered for
ensemble building.
Expand Down Expand Up @@ -232,7 +236,7 @@ def search(
budget_type=budget_type,
budget=budget,
total_walltime_limit=total_walltime_limit,
func_eval_time_limit=func_eval_time_limit,
func_eval_time_limit_secs=func_eval_time_limit_secs,
enable_traditional_pipeline=enable_traditional_pipeline,
memory_limit=memory_limit,
smac_scenario_args=smac_scenario_args,
Expand Down
10 changes: 7 additions & 3 deletions autoPyTorch/api/tabular_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -114,7 +114,7 @@ def search(
budget_type: Optional[str] = None,
budget: Optional[float] = None,
total_walltime_limit: int = 100,
func_eval_time_limit: Optional[int] = None,
func_eval_time_limit_secs: Optional[int] = None,
enable_traditional_pipeline: bool = False,
memory_limit: Optional[int] = 4096,
smac_scenario_args: Optional[Dict[str, Any]] = None,
Expand Down Expand Up @@ -148,13 +148,17 @@ def search(
in seconds for the search of appropriate models.
By increasing this value, autopytorch has a higher
chance of finding better models.
func_eval_time_limit (int), (default=None): Time limit
func_eval_time_limit_secs (int), (default=None): Time limit
for a single call to the machine learning model.
Model fitting will be terminated if the machine
learning algorithm runs over the time limit. Set
this value high enough so that typical machine
learning algorithms can be fit on the training
data.
When set to None, this time will automatically be set to
total_walltime_limit // 2 to allow enough time to fit
at least 2 individual machine learning algorithms.
Set to np.inf in case no time limit is desired.
enable_traditional_pipeline (bool), (default=False):
Not enabled for regression. This flag is here to comply
with the API.
Expand Down Expand Up @@ -220,7 +224,7 @@ def search(
budget_type=budget_type,
budget=budget,
total_walltime_limit=total_walltime_limit,
func_eval_time_limit=func_eval_time_limit,
func_eval_time_limit_secs=func_eval_time_limit_secs,
enable_traditional_pipeline=enable_traditional_pipeline,
memory_limit=memory_limit,
smac_scenario_args=smac_scenario_args,
Expand Down
8 changes: 4 additions & 4 deletions autoPyTorch/optimizer/smbo.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,7 @@ def __init__(self,
dataset_name: str,
backend: Backend,
total_walltime_limit: float,
func_eval_time_limit: float,
func_eval_time_limit_secs: float,
memory_limit: typing.Optional[int],
metric: autoPyTorchMetric,
watcher: StopWatch,
Expand Down Expand Up @@ -120,7 +120,7 @@ def __init__(self,
An interface with disk
total_walltime_limit (float):
The maximum allowed time for this job
func_eval_time_limit (float):
func_eval_time_limit_secs (float):
How much each individual task is allowed to last
memory_limit (typing.Optional[int]):
Maximum allowed CPU memory this task can use
Expand Down Expand Up @@ -180,7 +180,7 @@ def __init__(self,
# and a bunch of useful limits
self.worst_possible_result = get_cost_of_crash(self.metric)
self.total_walltime_limit = int(total_walltime_limit)
self.func_eval_time_limit = int(func_eval_time_limit)
self.func_eval_time_limit_secs = int(func_eval_time_limit_secs)
self.memory_limit = memory_limit
self.watcher = watcher
self.seed = seed
Expand Down Expand Up @@ -265,7 +265,7 @@ def run_smbo(self, func: typing.Optional[typing.Callable] = None
scenario_dict = {
'abort_on_first_run_crash': False,
'cs': self.config_space,
'cutoff_time': self.func_eval_time_limit,
'cutoff_time': self.func_eval_time_limit_secs,
'deterministic': 'true',
'instances': instances,
'memory_limit': self.memory_limit,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,16 @@
SVMModel)

_classifiers = {
# Sort by fit importance
# Sort by more robust models
# Depending on the allocated time budget, only the
# top models from this dict are two be fitted.
# LGBM is the more robust model, with
# internal measures to prevent crashes, overfit
# Additionally, it is one of the state of the art
# methods for tabular prediction.
# Then follow with catboost for categorical heavy
# datasets. The other models are complementary and
# their ordering is not critical
'lgb': LGBModel,
'catboost': CatboostModel,
'random_forest': RFModel,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@
y_test=y_test.copy(),
optimize_metric='accuracy',
total_walltime_limit=300,
func_eval_time_limit=50
func_eval_time_limit_secs=50
)

############################################################################
Expand Down
2 changes: 1 addition & 1 deletion examples/tabular/20_basics/example_tabular_regression.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
y_test=y_test_scaled.copy(),
optimize_metric='r2',
total_walltime_limit=300,
func_eval_time_limit=50,
func_eval_time_limit_secs=50,
enable_traditional_pipeline=False,
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def get_search_space_updates():
y_test=y_test.copy(),
optimize_metric='accuracy',
total_walltime_limit=300,
func_eval_time_limit=50
func_eval_time_limit_secs=50
)

############################################################################
Expand Down Expand Up @@ -119,7 +119,7 @@ def get_search_space_updates():
y_test=y_test.copy(),
optimize_metric='accuracy',
total_walltime_limit=300,
func_eval_time_limit=50
func_eval_time_limit_secs=50
)

############################################################################
Expand Down
6 changes: 3 additions & 3 deletions examples/tabular/40_advanced/example_resampling_strategy.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
y_test=y_test.copy(),
optimize_metric='accuracy',
total_walltime_limit=150,
func_eval_time_limit=30
func_eval_time_limit_secs=30
)

############################################################################
Expand Down Expand Up @@ -104,7 +104,7 @@
y_test=y_test.copy(),
optimize_metric='accuracy',
total_walltime_limit=150,
func_eval_time_limit=30
func_eval_time_limit_secs=30
)

############################################################################
Expand Down Expand Up @@ -145,7 +145,7 @@
y_test=y_test.copy(),
optimize_metric='accuracy',
total_walltime_limit=150,
func_eval_time_limit=30
func_eval_time_limit_secs=30
)

############################################################################
Expand Down
6 changes: 3 additions & 3 deletions test/test_api/test_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def test_tabular_classification(openml_id, resampling_strategy, backend):
X_test=X_test, y_test=y_test,
optimize_metric='accuracy',
total_walltime_limit=150,
func_eval_time_limit=50,
func_eval_time_limit_secs=50,
enable_traditional_pipeline=False,
)

Expand Down Expand Up @@ -230,7 +230,7 @@ def test_tabular_regression(openml_name, resampling_strategy, backend):
X_test=X_test, y_test=y_test,
optimize_metric='r2',
total_walltime_limit=50,
func_eval_time_limit=10,
func_eval_time_limit_secs=10,
enable_traditional_pipeline=False,
)

Expand Down Expand Up @@ -390,7 +390,7 @@ def test_tabular_input_support(openml_id, backend):
X_test=X_test, y_test=y_test,
optimize_metric='accuracy',
total_walltime_limit=150,
func_eval_time_limit=50,
func_eval_time_limit_secs=50,
enable_traditional_pipeline=False,
load_models=False,
)