From 393de33dd826ca4f06a6f0bf7cc0fe1ac2452491 Mon Sep 17 00:00:00 2001 From: Susan Xueqing Liu Date: Sun, 9 Apr 2023 12:53:30 -0400 Subject: [PATCH] handling nlp divide by zero (#926) * handling nlp divide by zero * catching zerodivisionerror * catching zerodivisionerror * catching zerodivisionerror * addressing comments * addressing comments * updating test case * update * add blank to last line * update nlp notebook * rerun * rerun * sync with main * add model selection for nlg * addressing keyerror * add raise exception * update * fix bug * revert * updating automl_nlp * Update flaml/automl/model.py Co-authored-by: Zvi Baratz * address comments * address comments --------- Co-authored-by: Li Jiang Co-authored-by: Zvi Baratz --- flaml/automl/automl.py | 1 + flaml/automl/model.py | 23 ++++++++++++++--------- test/nlp/test_autohf.py | 4 +++- 3 files changed, 18 insertions(+), 10 deletions(-) diff --git a/flaml/automl/automl.py b/flaml/automl/automl.py index 2d8ea04a1a41..81ec4245845b 100644 --- a/flaml/automl/automl.py +++ b/flaml/automl/automl.py @@ -594,6 +594,7 @@ def predict( return None X = self._state.task.preprocess(X, self._transformer) y_pred = estimator.predict(X, **pred_kwargs) + if ( isinstance(y_pred, np.ndarray) and y_pred.ndim > 1 diff --git a/flaml/automl/model.py b/flaml/automl/model.py index 2144be8e4dc5..7208674c5317 100644 --- a/flaml/automl/model.py +++ b/flaml/automl/model.py @@ -1191,8 +1191,13 @@ def predict_proba(self, X, **pred_kwargs): test_dataset = Dataset.from_pandas(X_test) new_trainer = self._init_model_for_predict() - predictions = new_trainer.predict(test_dataset) - return predictions.predictions + try: + predictions = new_trainer.predict(test_dataset).predictions + except ZeroDivisionError: + logger.warning("Zero division error appeared in HuggingFace Transformers.") + predictions = np.array([-0.05] * len(test_dataset)) + else: + return predictions def score(self, X_val: DataFrame, y_val: Series, **kwargs): import transformers @@ -1222,13 +1227,13 @@ def predict(self, X, **pred_kwargs): new_trainer = self._init_model_for_predict() - if self._task not in NLG_TASKS: - predictions = new_trainer.predict(test_dataset) - else: - predictions = new_trainer.predict( - test_dataset, - metric_key_prefix="predict", - ) + kwargs = {} if self._task not in NLG_TASKS else {"metric_key_prefix": "predict"} + try: + predictions = new_trainer.predict(test_dataset, **kwargs) + except ZeroDivisionError: + logger.warning("Zero division error appeared in HuggingFace Transformers.") + predictions = np.array([0] * len(test_dataset)) + post_y_pred, _ = postprocess_prediction_and_true( task=self._task, y_pred=predictions.predictions, diff --git a/test/nlp/test_autohf.py b/test/nlp/test_autohf.py index 8edadc2005ec..d751200fdaee 100644 --- a/test/nlp/test_autohf.py +++ b/test/nlp/test_autohf.py @@ -62,7 +62,9 @@ def test_hf_data(): **automl_settings ) automl.predict(X_test, **{"per_device_eval_batch_size": 2}) - automl.predict(["test test", "test test"]) + automl.predict(["", ""]) + automl.predict_proba(["", ""]) + automl.predict( [ ["test test", "test test"],