From 07af337ab801eeb63568215df3e723acce50cce2 Mon Sep 17 00:00:00 2001 From: Walter Martin Date: Fri, 15 Oct 2021 15:41:16 -0400 Subject: [PATCH 1/5] mlflow improvements for metric performance Signed-off-by: Walter Martin --- src/transformers/integrations.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/transformers/integrations.py b/src/transformers/integrations.py index 4ab15b9d50f7..b499a14a7b91 100644 --- a/src/transformers/integrations.py +++ b/src/transformers/integrations.py @@ -20,6 +20,7 @@ import numbers import os import tempfile +import time import weakref from copy import deepcopy from pathlib import Path @@ -245,7 +246,10 @@ def _objective(trial, local_trainer, checkpoint_dir=None): def get_available_reporting_integrations(): integrations = [] if is_azureml_available(): - integrations.append("azure_ml") + if is_mlflow_available(): + integrations.append("mlflow") + else: + integrations.append("azure_ml") if is_comet_available(): integrations.append("comet_ml") if is_mlflow_available(): @@ -896,9 +900,10 @@ def on_log(self, args, state, control, logs, model=None, **kwargs): if not self._initialized: self.setup(args, state, model) if state.is_world_process_zero: + metrics = [] for k, v in logs.items(): if isinstance(v, (int, float)): - self._ml_flow.log_metric(k, v, step=state.global_step) + metrics.append(self._ml_flow.entities.Metric(k, v, int(time.time() * 1000), state.global_step)) else: logger.warning( f"Trainer is attempting to log a value of " @@ -906,6 +911,7 @@ def on_log(self, args, state, control, logs, model=None, **kwargs): f"MLflow's log_metric() only accepts float and " f"int types so we dropped this attribute." ) + self._ml_flow.log_batch(metrics=metrics) def on_train_end(self, args, state, control, **kwargs): if self._initialized and state.is_world_process_zero: From 295c278d8e1d891a31c8544464fe794636921df7 Mon Sep 17 00:00:00 2001 From: Walter Martin Date: Mon, 18 Oct 2021 11:13:52 -0400 Subject: [PATCH 2/5] remove integration changes Signed-off-by: Walter Martin --- src/transformers/integrations.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/src/transformers/integrations.py b/src/transformers/integrations.py index b499a14a7b91..2b54f0409477 100644 --- a/src/transformers/integrations.py +++ b/src/transformers/integrations.py @@ -246,10 +246,7 @@ def _objective(trial, local_trainer, checkpoint_dir=None): def get_available_reporting_integrations(): integrations = [] if is_azureml_available(): - if is_mlflow_available(): - integrations.append("mlflow") - else: - integrations.append("azure_ml") + integrations.append("azure_ml") if is_comet_available(): integrations.append("comet_ml") if is_mlflow_available(): From 31d10a7ced0964977a734969d41cfb01c35d65e6 Mon Sep 17 00:00:00 2001 From: Walter Martin Date: Wed, 20 Oct 2021 15:12:22 -0400 Subject: [PATCH 3/5] switch to fluent log_metrics instead of log_batch Signed-off-by: Walter Martin --- src/transformers/integrations.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/transformers/integrations.py b/src/transformers/integrations.py index 2b54f0409477..f99a9a72a2ae 100644 --- a/src/transformers/integrations.py +++ b/src/transformers/integrations.py @@ -897,10 +897,10 @@ def on_log(self, args, state, control, logs, model=None, **kwargs): if not self._initialized: self.setup(args, state, model) if state.is_world_process_zero: - metrics = [] + metrics = {} for k, v in logs.items(): if isinstance(v, (int, float)): - metrics.append(self._ml_flow.entities.Metric(k, v, int(time.time() * 1000), state.global_step)) + metrics[k] = v else: logger.warning( f"Trainer is attempting to log a value of " @@ -908,7 +908,7 @@ def on_log(self, args, state, control, logs, model=None, **kwargs): f"MLflow's log_metric() only accepts float and " f"int types so we dropped this attribute." ) - self._ml_flow.log_batch(metrics=metrics) + self._ml_flow.log_metrics(metrics=metrics, step=self.global_step) def on_train_end(self, args, state, control, **kwargs): if self._initialized and state.is_world_process_zero: From c9cb4e687c059159696c74c1f930029c83b8c3e8 Mon Sep 17 00:00:00 2001 From: Walter Martin Date: Wed, 20 Oct 2021 15:13:34 -0400 Subject: [PATCH 4/5] fix self>state Signed-off-by: Walter Martin --- src/transformers/integrations.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/transformers/integrations.py b/src/transformers/integrations.py index f99a9a72a2ae..ea214d5733b6 100644 --- a/src/transformers/integrations.py +++ b/src/transformers/integrations.py @@ -908,7 +908,7 @@ def on_log(self, args, state, control, logs, model=None, **kwargs): f"MLflow's log_metric() only accepts float and " f"int types so we dropped this attribute." ) - self._ml_flow.log_metrics(metrics=metrics, step=self.global_step) + self._ml_flow.log_metrics(metrics=metrics, step=state.global_step) def on_train_end(self, args, state, control, **kwargs): if self._initialized and state.is_world_process_zero: From 917fd1c7fca81bc94389a672f32426e97ae60e85 Mon Sep 17 00:00:00 2001 From: Walter Martin Date: Wed, 20 Oct 2021 15:14:19 -0400 Subject: [PATCH 5/5] remove time import Signed-off-by: Walter Martin --- src/transformers/integrations.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/transformers/integrations.py b/src/transformers/integrations.py index ea214d5733b6..cbb6b31fc30c 100644 --- a/src/transformers/integrations.py +++ b/src/transformers/integrations.py @@ -20,7 +20,6 @@ import numbers import os import tempfile -import time import weakref from copy import deepcopy from pathlib import Path