diff --git a/test/test_api/test_api.py b/test/test_api/test_api.py index 1acd412e2..971552f25 100644 --- a/test/test_api/test_api.py +++ b/test/test_api/test_api.py @@ -105,33 +105,33 @@ def test_tabular_classification(openml_id, resampling_strategy, backend): # Search for an existing run key in disc. A individual model might have # a timeout and hence was not written to disc for i, (run_key, value) in enumerate(estimator.run_history.data.items()): - if i == 0: - # Ignore dummy run - continue if 'SUCCESS' not in str(value.status): continue run_key_model_run_dir = estimator._backend.get_numrun_directory( - estimator.seed, run_key.config_id, run_key.budget) + estimator.seed, run_key.config_id + 1, run_key.budget) if os.path.exists(run_key_model_run_dir): + # Runkey config id is different from the num_run + # more specifically num_run = config_id + 1(dummy) + successful_num_run = run_key.config_id + 1 break if resampling_strategy == HoldoutValTypes.holdout_validation: model_file = os.path.join(run_key_model_run_dir, - f"{estimator.seed}.{run_key.config_id}.{run_key.budget}.model") + f"{estimator.seed}.{successful_num_run}.{run_key.budget}.model") assert os.path.exists(model_file), model_file model = estimator._backend.load_model_by_seed_and_id_and_budget( - estimator.seed, run_key.config_id, run_key.budget) + estimator.seed, successful_num_run, run_key.budget) assert isinstance(model.named_steps['network'].get_network(), torch.nn.Module) elif resampling_strategy == CrossValTypes.k_fold_cross_validation: model_file = os.path.join( run_key_model_run_dir, - f"{estimator.seed}.{run_key.config_id}.{run_key.budget}.cv_model" + f"{estimator.seed}.{successful_num_run}.{run_key.budget}.cv_model" ) assert os.path.exists(model_file), model_file model = estimator._backend.load_cv_model_by_seed_and_id_and_budget( - estimator.seed, run_key.config_id, run_key.budget) + estimator.seed, successful_num_run, run_key.budget) assert isinstance(model, VotingClassifier) assert len(model.estimators_) == 3 assert isinstance(model.estimators_[0].named_steps['network'].get_network(), @@ -142,7 +142,7 @@ def test_tabular_classification(openml_id, resampling_strategy, backend): # Make sure that predictions on the test data are printed and make sense test_prediction = os.path.join(run_key_model_run_dir, estimator._backend.get_prediction_filename( - 'test', estimator.seed, run_key.config_id, + 'test', estimator.seed, successful_num_run, run_key.budget)) assert os.path.exists(test_prediction), test_prediction assert np.shape(np.load(test_prediction, allow_pickle=True))[0] == np.shape(X_test)[0] @@ -152,7 +152,7 @@ def test_tabular_classification(openml_id, resampling_strategy, backend): ensemble_prediction = os.path.join(run_key_model_run_dir, estimator._backend.get_prediction_filename( 'ensemble', - estimator.seed, run_key.config_id, + estimator.seed, successful_num_run, run_key.budget)) assert os.path.exists(ensemble_prediction), ensemble_prediction assert np.shape(np.load(ensemble_prediction, allow_pickle=True))[0] == np.shape( @@ -213,10 +213,16 @@ def test_tabular_regression(openml_name, resampling_strategy, backend): X_train, X_test, y_train, y_test = sklearn.model_selection.train_test_split( X, y, random_state=1) + include = None + # for python less than 3.7, learned entity embedding + # is not able to be stored on disk (only on CI) + if sys.version_info < (3, 7): + include = {'network_embedding': ['NoEmbedding']} # Search for a good configuration estimator = TabularRegressionTask( backend=backend, resampling_strategy=resampling_strategy, + include_components=include ) estimator.search( @@ -267,32 +273,32 @@ def test_tabular_regression(openml_name, resampling_strategy, backend): # Search for an existing run key in disc. A individual model might have # a timeout and hence was not written to disc for i, (run_key, value) in enumerate(estimator.run_history.data.items()): - if i == 0: - # Ignore dummy run - continue if 'SUCCESS' not in str(value.status): continue run_key_model_run_dir = estimator._backend.get_numrun_directory( - estimator.seed, run_key.config_id, run_key.budget) + estimator.seed, run_key.config_id + 1, run_key.budget) if os.path.exists(run_key_model_run_dir): + # Runkey config id is different from the num_run + # more specifically num_run = config_id + 1(dummy) + successful_num_run = run_key.config_id + 1 break if resampling_strategy == HoldoutValTypes.holdout_validation: model_file = os.path.join(run_key_model_run_dir, - f"{estimator.seed}.{run_key.config_id}.{run_key.budget}.model") + f"{estimator.seed}.{successful_num_run}.{run_key.budget}.model") assert os.path.exists(model_file), model_file model = estimator._backend.load_model_by_seed_and_id_and_budget( - estimator.seed, run_key.config_id, run_key.budget) + estimator.seed, successful_num_run, run_key.budget) assert isinstance(model.named_steps['network'].get_network(), torch.nn.Module) elif resampling_strategy == CrossValTypes.k_fold_cross_validation: model_file = os.path.join( run_key_model_run_dir, - f"{estimator.seed}.{run_key.config_id}.{run_key.budget}.cv_model" + f"{estimator.seed}.{successful_num_run}.{run_key.budget}.cv_model" ) assert os.path.exists(model_file), model_file model = estimator._backend.load_cv_model_by_seed_and_id_and_budget( - estimator.seed, run_key.config_id, run_key.budget) + estimator.seed, successful_num_run, run_key.budget) assert isinstance(model, VotingRegressor) assert len(model.estimators_) == 3 assert isinstance(model.estimators_[0].named_steps['network'].get_network(), @@ -303,7 +309,7 @@ def test_tabular_regression(openml_name, resampling_strategy, backend): # Make sure that predictions on the test data are printed and make sense test_prediction = os.path.join(run_key_model_run_dir, estimator._backend.get_prediction_filename( - 'test', estimator.seed, run_key.config_id, + 'test', estimator.seed, successful_num_run, run_key.budget)) assert os.path.exists(test_prediction), test_prediction assert np.shape(np.load(test_prediction, allow_pickle=True))[0] == np.shape(X_test)[0] @@ -313,7 +319,7 @@ def test_tabular_regression(openml_name, resampling_strategy, backend): ensemble_prediction = os.path.join(run_key_model_run_dir, estimator._backend.get_prediction_filename( 'ensemble', - estimator.seed, run_key.config_id, + estimator.seed, successful_num_run, run_key.budget)) assert os.path.exists(ensemble_prediction), ensemble_prediction assert np.shape(np.load(ensemble_prediction, allow_pickle=True))[0] == np.shape(