From 8e30c5a3635b04558d7db3934ecaaa6af0fb0dd2 Mon Sep 17 00:00:00 2001 From: Ozgur Aslan Date: Sat, 7 Dec 2024 12:49:47 +0300 Subject: [PATCH 1/3] Float rounds are adjusted & case sensitivity in eval metrics is removed --- flexml/_model_tuner.py | 16 ++++++++-------- flexml/config/supervised_config.py | 8 ++++---- flexml/structures/supervised_base.py | 20 +++++++++++--------- 3 files changed, 23 insertions(+), 21 deletions(-) diff --git a/flexml/_model_tuner.py b/flexml/_model_tuner.py index 961a08a..f587f4b 100644 --- a/flexml/_model_tuner.py +++ b/flexml/_model_tuner.py @@ -202,19 +202,19 @@ def _model_evaluator(self, eval_metric = eval_metric.lower() if eval_metric == 'r2': - return r2_score(self.y_test, model.predict(self.X_test)) + return round(r2_score(self.y_test, model.predict(self.X_test)), 6) elif eval_metric == 'mae': - return mean_absolute_error(self.y_test, model.predict(self.X_test)) + return round(mean_absolute_error(self.y_test, model.predict(self.X_test)), 6) elif eval_metric == 'mse': - return mean_squared_error(self.y_test, model.predict(self.X_test)) + return round(mean_squared_error(self.y_test, model.predict(self.X_test)), 6) elif eval_metric == 'accuracy': - return accuracy_score(self.y_test, model.predict(self.X_test)) + return round(accuracy_score(self.y_test, model.predict(self.X_test)), 6) elif eval_metric == 'precision': - return precision_score(self.y_test, model.predict(self.X_test)) + return round(precision_score(self.y_test, model.predict(self.X_test)), 6) elif eval_metric == 'recall': - return recall_score(self.y_test, model.predict(self.X_test)) + return round(recall_score(self.y_test, model.predict(self.X_test)), 6) elif eval_metric == 'f1': - return f1_score(self.y_test, model.predict(self.X_test)) + return round(f1_score(self.y_test, model.predict(self.X_test)), 6) else: error_msg = "Error while evaluating the current model during the model tuning process. The eval_metric should be one of the following: 'r2', 'mae', 'mse', 'accuracy', 'precision', 'recall', 'f1'" self.logger.error(error_msg) @@ -526,7 +526,7 @@ def objective(trial): # Update the best score and best hyperparameters If the current score is better than the best one if model_stats['tuned_model_score'] is None or score > model_stats['tuned_model_score']: - model_stats['tuned_model_score'] = round(score, 4) + model_stats['tuned_model_score'] = round(score, 6) model_stats['tuned_model'] = test_model return score diff --git a/flexml/config/supervised_config.py b/flexml/config/supervised_config.py index 8ffea3a..45a7ac6 100644 --- a/flexml/config/supervised_config.py +++ b/flexml/config/supervised_config.py @@ -11,9 +11,9 @@ # Regression & Classification Evaluation Metrics EVALUATION_METRICS = { - "Regression": {"DEFAULT": "r2", - "ALL": ["r2", "mae", "mse", "rmse"]}, + "Regression": {"DEFAULT": "R2", + "ALL": ["R2", "MAE", "MSE", "RMSE"]}, - "Classification": {"DEFAULT": "accuracy", - "ALL": ["accuracy", "precision", "recall", "f1_score"]} + "Classification": {"DEFAULT": "Accuracy", + "ALL": ["Accuracy", "Precision", "Recall", "F1 Score"]} } diff --git a/flexml/structures/supervised_base.py b/flexml/structures/supervised_base.py index a87a24f..31b29af 100644 --- a/flexml/structures/supervised_base.py +++ b/flexml/structures/supervised_base.py @@ -188,7 +188,9 @@ def __eval_metric_checker(self, eval_metric: Optional[str] = None) -> str: self.__logger.error(error_msg) raise ValueError(error_msg) - if eval_metric not in self.__ALL_EVALUATION_METRICS: + if ((self.__ML_TASK_TYPE == "Classification" and eval_metric.lower().capitalize() not in self.__ALL_EVALUATION_METRICS) or + (self.__ML_TASK_TYPE == "Regression" and eval_metric.upper() not in self.__ALL_EVALUATION_METRICS)): + error_msg = f"{eval_metric} is not a valid evaluation metric for {self.__ML_TASK_TYPE}, expected one of the following: {self.__ALL_EVALUATION_METRICS}" self.__logger.error(error_msg) raise ValueError(error_msg) @@ -237,10 +239,10 @@ def __evaluate_model_perf(self, y_test, y_pred): """ if self.__ML_TASK_TYPE == "Regression": - r2 = round(r2_score(y_test, y_pred), 4) - mae = round(mean_absolute_error(y_test, y_pred), 4) - mse = round(mean_squared_error(y_test, y_pred), 4) - rmse = round(np.sqrt(mse), 4) + r2 = round(r2_score(y_test, y_pred), 6) + mae = round(mean_absolute_error(y_test, y_pred), 6) + mse = round(mean_squared_error(y_test, y_pred), 6) + rmse = round(np.sqrt(mse), 6) return { "r2": r2, "mae": mae, @@ -249,10 +251,10 @@ def __evaluate_model_perf(self, y_test, y_pred): } elif self.__ML_TASK_TYPE == "Classification": - accuracy = round(accuracy_score(y_test, y_pred), 4) - precision = round(precision_score(y_test, y_pred, average='weighted'), 4) - recall = round(recall_score(y_test, y_pred, average='weighted'), 4) - f1 = round(f1_score(y_test, y_pred, average='weighted'), 4) + accuracy = round(accuracy_score(y_test, y_pred), 6) + precision = round(precision_score(y_test, y_pred, average='weighted'), 6) + recall = round(recall_score(y_test, y_pred, average='weighted'), 6) + f1 = round(f1_score(y_test, y_pred, average='weighted'), 6) return { "accuracy": accuracy, "precision": precision, From b894e5e96a79ffdd084ed6fb3572efdf0fcf1d3f Mon Sep 17 00:00:00 2001 From: Ozgur Aslan Date: Tue, 10 Dec 2024 20:30:43 +0300 Subject: [PATCH 2/3] Error fix for #8e30c5a commit * Since custom metrics are needed for model tuning processes, custom eval_metric_reveiser function is developed * eval_metric_checker at SupervisedBase is moved to general validator module so that both model tuner and SupervisedBase can use it --- flexml/_model_tuner.py | 102 ++++++++++++++++----------- flexml/helpers/__init__.py | 1 + flexml/helpers/validators.py | 62 ++++++++++++++++ flexml/structures/supervised_base.py | 89 ++++++++--------------- 4 files changed, 154 insertions(+), 100 deletions(-) create mode 100644 flexml/helpers/__init__.py create mode 100644 flexml/helpers/validators.py diff --git a/flexml/_model_tuner.py b/flexml/_model_tuner.py index f587f4b..be01339 100644 --- a/flexml/_model_tuner.py +++ b/flexml/_model_tuner.py @@ -14,6 +14,7 @@ f1_score) from flexml.logger.logger import get_logger +from flexml.helpers import eval_metric_checker class ModelTuner: @@ -66,6 +67,25 @@ def __init__(self, self.logger = get_logger(__name__, "PROD", logging_to_file) + @staticmethod + def __eval_metric_revieser(eval_metric: str) -> str: + """ + Scikit-learn based hyperparameter optimization methods (GridSearch & Randomized Search) require spesific namings for evaluation metrics + + This method is used to revise the evaluation metric name for the optimization process + + Parameters + ---------- + eval_metric : str + The evaluation metric + + Returns + ------- + str + The revised evaluation metric name. e.g. 'R2' to 'r2, 'Accuracy' to 'accuracy', 'F1 Score' to 'f1_weighted' etc. + """ + return eval_metric.lower() if eval_metric != 'F1 Score' else 'f1_weighted' + def _param_grid_validator(self, model_available_params: dict, param_grid: dict) -> dict: @@ -185,38 +205,38 @@ def _model_evaluator(self, eval_metric : str The evaluation metric that will be used to evaluate the model. It can be one of the following: - * 'r2' for R^2 score + * 'R2' for R^2 score - * 'mae' for Mean Absolute Error + * 'MAE' for Mean Absolute Error - * 'mse' for Mean Squared Error + * 'MSE' for Mean Squared Error - * 'accuracy' for Accuracy + * 'Accuracy' for Accuracy - * 'precision' for Precision + * 'Precision' for Precision - * 'recall' for Recall + * 'Recall' for Recall - * 'f1' for F1 score + * 'F1 Score' for F1 score """ - eval_metric = eval_metric.lower() + eval_metric = eval_metric_checker(self.ml_problem_type, eval_metric) - if eval_metric == 'r2': + if eval_metric == 'R2': return round(r2_score(self.y_test, model.predict(self.X_test)), 6) - elif eval_metric == 'mae': + elif eval_metric == 'MAE': return round(mean_absolute_error(self.y_test, model.predict(self.X_test)), 6) - elif eval_metric == 'mse': + elif eval_metric == 'MSE': return round(mean_squared_error(self.y_test, model.predict(self.X_test)), 6) - elif eval_metric == 'accuracy': + elif eval_metric == 'Accuracy': return round(accuracy_score(self.y_test, model.predict(self.X_test)), 6) - elif eval_metric == 'precision': + elif eval_metric == 'Precision': return round(precision_score(self.y_test, model.predict(self.X_test)), 6) - elif eval_metric == 'recall': + elif eval_metric == 'Recall': return round(recall_score(self.y_test, model.predict(self.X_test)), 6) - elif eval_metric == 'f1': + elif eval_metric == 'F1 Score': return round(f1_score(self.y_test, model.predict(self.X_test)), 6) else: - error_msg = "Error while evaluating the current model during the model tuning process. The eval_metric should be one of the following: 'r2', 'mae', 'mse', 'accuracy', 'precision', 'recall', 'f1'" + error_msg = "Error while evaluating the current model during the model tuning process. The eval_metric should be one of the following: 'R2', 'MAE', 'MSE', 'Accuracy', 'Precision', 'Recall', 'F1 Score'" self.logger.error(error_msg) raise ValueError(error_msg) @@ -241,19 +261,19 @@ def grid_search(self, eval_metric : str The evaluation metric that will be used to evaluate the model. It can be one of the following: - * 'r2' for R^2 score + * 'R2' for R^2 score - * 'mae' for Mean Absolute Error + * 'MAE' for Mean Absolute Error - * 'mse' for Mean Squared Error + * 'MSE' for Mean Squared Error - * 'accuracy' for Accuracy + * 'Accuracy' for Accuracy - * 'precision' for Precision + * 'Precision' for Precision - * 'recall' for Recall + * 'Recall' for Recall - * 'f1' for F1 score + * 'F1 Score' for F1 score cv : int (default=3) The number of cross-validation splits. The default is 3. @@ -291,10 +311,11 @@ def grid_search(self, """ model_stats = self._setup_tuning("GridSearchCV", model, param_grid, n_iter=None, cv=cv, n_jobs=n_jobs) param_grid = model_stats['tuning_param_grid'] + scoring_eval_metric = self.__eval_metric_revieser(eval_metric) try: t_start = time() - search_result = GridSearchCV(model, param_grid, scoring=eval_metric, cv=cv, n_jobs=n_jobs, verbose=verbose).fit(self.X_train, self.y_train) + search_result = GridSearchCV(model, param_grid, scoring=scoring_eval_metric, cv=cv, n_jobs=n_jobs, verbose=verbose).fit(self.X_train, self.y_train) t_end = time() time_taken = round(t_end - t_start, 2) @@ -330,19 +351,19 @@ def random_search(self, eval_metric : str The evaluation metric that will be used to evaluate the model. It can be one of the following: - * 'r2' for R^2 score + * 'R2' for R^2 score - * 'mae' for Mean Absolute Error + * 'MAE' for Mean Absolute Error - * 'mse' for Mean Squared Error + * 'MSE' for Mean Squared Error - * 'accuracy' for Accuracy + * 'Accuracy' for Accuracy - * 'precision' for Precision + * 'Precision' for Precision - * 'recall' for Recall + * 'Recall' for Recall - * 'f1' for F1 score + * 'F1 Score' for F1 score n_iter : int, optional (default=10) The number of trials. The default is 10. @@ -374,10 +395,11 @@ def random_search(self, """ model_stats = self._setup_tuning("randomized_search", model, param_grid, n_iter=n_iter, cv=cv, n_jobs=n_jobs) param_grid = model_stats['tuning_param_grid'] + scoring_eval_metric = self.__eval_metric_revieser(eval_metric) try: t_start = time() - search_result = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=n_iter, scoring=eval_metric, cv=cv, n_jobs=n_jobs, verbose=verbose).fit(self.X_train, self.y_train) + search_result = RandomizedSearchCV(estimator=model, param_distributions=param_grid, n_iter=n_iter, scoring=scoring_eval_metric, cv=cv, n_jobs=n_jobs, verbose=verbose).fit(self.X_train, self.y_train) t_end = time() time_taken = round(t_end - t_start, 2) @@ -413,19 +435,19 @@ def optuna_search(self, eval_metric : str The evaluation metric that will be used to evaluate the model. It can be one of the following: - * 'r2' for R^2 score + * 'R2' for R^2 score - * 'mae' for Mean Absolute Error + * 'MAE' for Mean Absolute Error - * 'mse' for Mean Squared Error + * 'MSE' for Mean Squared Error - * 'accuracy' for Accuracy + * 'Accuracy' for Accuracy - * 'precision' for Precision + * 'Precision' for Precision - * 'recall' for Recall + * 'Recall' for Recall - * 'f1' for F1 score + * 'F1 Score' for F1 score n_iter : int, optional (default=100) The number of trials. The default is 100. @@ -483,7 +505,7 @@ def optuna_search(self, elif verbose == 4: optuna.logging.set_verbosity(optuna.logging.DEBUG) - study_direction = "maximize" if eval_metric in ['r2', 'accuracy', 'precision', 'recall', 'f1'] else "minimize" + study_direction = "maximize" if eval_metric in ['R2', 'Accuracy', 'Precision', 'Recall', 'F1 Score'] else "minimize" def objective(trial): """ diff --git a/flexml/helpers/__init__.py b/flexml/helpers/__init__.py new file mode 100644 index 0000000..98f5192 --- /dev/null +++ b/flexml/helpers/__init__.py @@ -0,0 +1 @@ +from flexml.helpers.validators import eval_metric_checker \ No newline at end of file diff --git a/flexml/helpers/validators.py b/flexml/helpers/validators.py new file mode 100644 index 0000000..5d51e53 --- /dev/null +++ b/flexml/helpers/validators.py @@ -0,0 +1,62 @@ +from typing import Optional, List +from flexml.config.supervised_config import EVALUATION_METRICS +from flexml.logger.logger import get_logger + +def eval_metric_checker(ml_task_type: str, + eval_metric: Optional[str] = None, + all_evaluation_metrics: Optional[List[str]] = None, + default_evaluation_metric: Optional[str] = None) -> str: + """ + Since eval_metric setting and validation is a common process for both Regression and Classification tasks... + this method is used to set and validate the evaluation metric. + + Parameters + ---------- + ml_task_type : str + The type of ML task ('Regression' or 'Classification') + + eval_metric : str, (default=None) + The evaluation metric to use for model evaluation + + If passed as None, the default evaluation metric of the corresponding ml_task_type will be used + + all_evaluation_metrics : List[str], (default=None) + All possible evaluation metrics for the current task (Regression or Classification), e.g. ['R2', 'MAE', 'MSE', 'RMSE'] for Regression + + If passed as None, they will be fetched from the config file + + default_evaluation_metric : str, (default=None) + The default evaluation metric to use for the current task (Regression or Classification) e.g. 'R2' for Regression, 'Accuracy' for Classification + + If passed as None, it will be fetched from the config file + + Returns + ------- + str + The evaluation metric to use for model evaluation for the current task (Regression or Classification) + """ + logger = get_logger(__name__, "PROD", False) + + if default_evaluation_metric is None or all_evaluation_metrics is None: + default_evaluation_metric = EVALUATION_METRICS[ml_task_type]["DEFAULT"] + all_evaluation_metrics = EVALUATION_METRICS[ml_task_type]["ALL"] + + if eval_metric is None: + return default_evaluation_metric + + if not isinstance(eval_metric, str): + error_msg = f"eval_metric expected to be a string, got {type(eval_metric)}" + logger.error(error_msg) + raise TypeError(error_msg) + + if ml_task_type == "Regression": + eval_metric = eval_metric.upper() + else: + eval_metric = eval_metric.lower().capitalize() + + if eval_metric not in all_evaluation_metrics: + error_msg = f"{eval_metric} is not a valid evaluation metric for {ml_task_type}, expected one of the following: {all_evaluation_metrics}" + logger.error(error_msg) + raise ValueError(error_msg) + + return eval_metric \ No newline at end of file diff --git a/flexml/structures/supervised_base.py b/flexml/structures/supervised_base.py index 31b29af..5c88a8f 100644 --- a/flexml/structures/supervised_base.py +++ b/flexml/structures/supervised_base.py @@ -17,6 +17,7 @@ from flexml.config.supervised_config import ML_MODELS, EVALUATION_METRICS from flexml.logger.logger import get_logger +from flexml.helpers import eval_metric_checker from flexml._model_tuner import ModelTuner @@ -164,38 +165,6 @@ def __train_test_split(self, test_size: float, random_state: int) -> list[np.nda error_msg = f"An error occurred while splitting the data into train and test: {str(e)}" self.__logger.error(error_msg) raise ValueError(error_msg) - - def __eval_metric_checker(self, eval_metric: Optional[str] = None) -> str: - """ - Since eval_metric setting and validation is a common process for both Regression and Classification tasks... - this method is used to set and validate the evaluation metric. - - Parameters - ---------- - eval_metric : str - The evaluation metric to use for model evaluation. - - Returns - ------- - str - The evaluation metric to use for model evaluation for the current task (Regression or Classification) - """ - if eval_metric is None: # If the user passed nothing, the default evaluation metric will be used ('r2' for Regression, 'accuracy' for Classification) - return self.__DEFAULT_EVALUATION_METRIC - - if not isinstance(eval_metric, str): - error_msg = f"eval_metric expected to be a string, got {type(eval_metric)}" - self.__logger.error(error_msg) - raise ValueError(error_msg) - - if ((self.__ML_TASK_TYPE == "Classification" and eval_metric.lower().capitalize() not in self.__ALL_EVALUATION_METRICS) or - (self.__ML_TASK_TYPE == "Regression" and eval_metric.upper() not in self.__ALL_EVALUATION_METRICS)): - - error_msg = f"{eval_metric} is not a valid evaluation metric for {self.__ML_TASK_TYPE}, expected one of the following: {self.__ALL_EVALUATION_METRICS}" - self.__logger.error(error_msg) - raise ValueError(error_msg) - - return eval_metric def __top_n_models_checker(self, top_n_models: Optional[int]) -> int: """ @@ -233,9 +202,9 @@ def __evaluate_model_perf(self, y_test, y_pred): dict A dictionary containing the evaluation metric of the current task - * r2, mae, mse, rmse for Regression tasks + * R2, MAE, MSE, RMSE for Regression tasks - * accuracy, precision, recall, f1_score for Classification tasks + * Accuracy, Precision, Recall, F1 Score for Classification tasks """ if self.__ML_TASK_TYPE == "Regression": @@ -244,10 +213,10 @@ def __evaluate_model_perf(self, y_test, y_pred): mse = round(mean_squared_error(y_test, y_pred), 6) rmse = round(np.sqrt(mse), 6) return { - "r2": r2, - "mae": mae, - "mse": mse, - "rmse": rmse + "R2": r2, + "MAE": mae, + "MSE": mse, + "RMSE": rmse } elif self.__ML_TASK_TYPE == "Classification": @@ -256,10 +225,10 @@ def __evaluate_model_perf(self, y_test, y_pred): recall = round(recall_score(y_test, y_pred, average='weighted'), 6) f1 = round(f1_score(y_test, y_pred, average='weighted'), 6) return { - "accuracy": accuracy, - "precision": precision, - "recall": recall, - "f1_score": f1 + "Accuracy": accuracy, + "Precision": precision, + "Recall": recall, + "F1 Score": f1 } else: @@ -289,7 +258,7 @@ def start_experiment(self, test_size : float, (default=0.25) The size of the test data in the train-test split process. - eval_metric : str (default='r2' for Regression, 'accuracy' for Classification) + eval_metric : str (default='R2' for Regression, 'Accuracy' for Classification) The evaluation metric to use for model evaluation. random_state : int, (default=42) @@ -298,7 +267,7 @@ def start_experiment(self, For more info, visit https://scikit-learn.org/stable/glossary.html#term-random_state """ - self.eval_metric = self.__eval_metric_checker(eval_metric) + self.eval_metric = eval_metric_checker(self.__ML_TASK_TYPE, eval_metric) self.experiment_size = experiment_size self.test_size = test_size self.random_state = random_state @@ -374,12 +343,12 @@ def get_best_models(self, eval_metric: Optional[str] = None, top_n_models: int = ---------- top_n_models : int The number of top models to select based on the evaluation metric. - eval_metric : str (default='r2 for Regression, 'accuracy' for Classification) + eval_metric : str (default='R2 for Regression, 'Accuracy' for Classification) The evaluation metric to use for model evaluation: - * r2, mae, mse, rmse for Regression tasks + * R2, MAE, MSE, RMSE for Regression tasks - * accuracy, precision, recall, f1_score for Classification tasks + * Accuracy, Precision, Recall, F1 Score for Classification tasks Returns ------- object or list[object] @@ -393,7 +362,7 @@ def get_best_models(self, eval_metric: Optional[str] = None, top_n_models: int = top_n_models = self.__top_n_models_checker(top_n_models) if eval_metric is not None: - eval_metric = self.__eval_metric_checker(eval_metric) + eval_metric = eval_metric_checker(self.__ML_TASK_TYPE, eval_metric) else: # If the user doesn't pass a eval_metric, get the evaluation metric passed to the start_experiment function eval_metric = self.eval_metric @@ -425,8 +394,8 @@ def __sort_models(self, eval_metric: Optional[str] = None): Parameters ---------- - eval_metric : str (default='r2') - The evaluation metric to use for model evaluation (e.g. 'r2', 'mae', 'mse', 'rmse') + eval_metric : str (default='R2') + The evaluation metric to use for model evaluation (e.g. 'R2', 'MAE', 'MSE', 'RMSE') Returns ------- @@ -438,10 +407,10 @@ def __sort_models(self, eval_metric: Optional[str] = None): self.__logger.error(error_msg) raise ValueError(error_msg) - eval_metric = self.__eval_metric_checker(eval_metric) + eval_metric = eval_metric_checker(self.__ML_TASK_TYPE, eval_metric) # Since lower is better for mae, mse and rmse in Regression tasks, they should be sorted in ascending order - if self.__ML_TASK_TYPE == "Regression" and eval_metric in ['mae', 'mse', 'rmse']: + if self.__ML_TASK_TYPE == "Regression" and eval_metric in ['MAE', 'MSE', 'RMSE']: return self.__model_stats_df.sort_values(by=eval_metric, ascending=True).reset_index(drop = True) else: return self.__model_stats_df.sort_values(by=eval_metric, ascending=False).reset_index(drop = True) @@ -452,11 +421,11 @@ def show_model_stats(self, eval_metric: Optional[str] = None): Parameters ---------- - eval_metric : str (default='r2' for regression, 'accuracy' for classification) + eval_metric : str (default='R2' for regression, 'Accuracy' for classification) The evaluation metric to use for model evaluation - * r2, mae, mse, rmse for Regression tasks - * accuracy, precision, recall, f1_score for Classification tasks + * R2, MAE, MSE, RMSE for Regression tasks + * Accuracy, Precision, Recall, F1 Score for Classification tasks """ def highlight_best(s: pd.Series) -> list[str]: """ @@ -478,7 +447,7 @@ def highlight_best(s: pd.Series) -> list[str]: is_best = s == s.max() return ['background-color: green' if v else '' for v in is_best] - eval_metric = self.__eval_metric_checker(eval_metric) + eval_metric = eval_metric_checker(self.__ML_TASK_TYPE, eval_metric) sorted_model_stats_df = self.__sort_models(eval_metric) sorted_model_stats_df['Time Taken (sec)'] = sorted_model_stats_df['Time Taken (sec)'].apply(lambda x: round(x, 2)) sorted_model_stats_df.index += 1 @@ -567,12 +536,12 @@ def tune_model(self, * 'optuna' for Optuna (https://optuna.readthedocs.io/en/stable/) - eval_metric : str (default='r2' for regression, 'accuracy' for classification) + eval_metric : str (default='R2' for regression, 'Accuracy' for classification) The evaluation metric to use for model evaluation - * r2, mae, mse, rmse for Regression tasks + * R2, MAE, MSE, RMSE for Regression tasks - * accuracy, precision, recall, f1_score for Classification tasks + * Accuracy, Precision, Recall, F1 Score for Classification tasks param_grid : dict (default = defined custom param dict in flexml/config/tune_model_config.py) The parameter set to use for model tuning. @@ -646,7 +615,7 @@ def _show_tuning_report(tuning_report: dict): self.get_best_models() # Update the self.__model_stats_df self.show_model_stats() - eval_metric = self.__eval_metric_checker(eval_metric) + eval_metric = eval_metric_checker(self.__ML_TASK_TYPE, eval_metric) # Create the ModelTuner object If It's not created before, avoid creating it everytime tune_model() function is called if not hasattr(self, 'model_tuner'): From c7c23c89ced95c5c18eabe115152850b0ac8d904 Mon Sep 17 00:00:00 2001 From: Ozgur Aslan Date: Tue, 10 Dec 2024 21:15:00 +0300 Subject: [PATCH 3/3] Limited Scikit-learn to <=1.5.2 to avoid depcreated __sklearn_tags__ attribute error in XGBoost and LightGBM --- requirements-test.txt | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements-test.txt b/requirements-test.txt index ca1c420..1fbabdf 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -1,6 +1,6 @@ numpy>=1.21,<=1.26.4 pandas>=2.0.1 -scikit-learn>=1.5.0 +scikit-learn>=1.5.0,<=1.5.2 xgboost>=2.0.0 lightgbm>=4.0.0 catboost>=1.2.3 diff --git a/requirements.txt b/requirements.txt index 03ff2c3..d3f5591 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ numpy>=1.21,<=1.26.4 pandas>=2.0.1 -scikit-learn>=1.5.0 +scikit-learn>=1.5.0,<=1.5.2 xgboost>=2.0.0 lightgbm>=4.0.0 catboost>=1.2.3