From 5f6ad8df5a08e78a121a72a21e21d95abb072e58 Mon Sep 17 00:00:00 2001 From: A Vertex SDK engineer Date: Fri, 15 Dec 2023 12:49:31 -0800 Subject: [PATCH] feat: Allow reuse of deleted experiment run id. PiperOrigin-RevId: 591334624 --- .../metadata/experiment_resources.py | 7 +- .../metadata/experiment_run_resource.py | 52 +++++++-- tests/system/aiplatform/test_experiments.py | 110 +++++++++++++++++- 3 files changed, 155 insertions(+), 14 deletions(-) diff --git a/google/cloud/aiplatform/metadata/experiment_resources.py b/google/cloud/aiplatform/metadata/experiment_resources.py index 43b0dadffc..a417e1bfb2 100644 --- a/google/cloud/aiplatform/metadata/experiment_resources.py +++ b/google/cloud/aiplatform/metadata/experiment_resources.py @@ -395,7 +395,12 @@ def delete(self, *, delete_backing_tensorboard_runs: bool = False): experiment_run.delete( delete_backing_tensorboard_run=delete_backing_tensorboard_runs ) - self._metadata_context.delete() + try: + self._metadata_context.delete() + except exceptions.NotFound: + _LOGGER.warning( + f"Experiment {self.name} metadata node not found. Skipping deletion." + ) def get_data_frame(self) -> "pd.DataFrame": # noqa: F821 """Get parameters, metrics, and time series metrics of all runs in this experiment as Dataframe. diff --git a/google/cloud/aiplatform/metadata/experiment_run_resource.py b/google/cloud/aiplatform/metadata/experiment_run_resource.py index 995754cdf3..5f5f8cb14a 100644 --- a/google/cloud/aiplatform/metadata/experiment_run_resource.py +++ b/google/cloud/aiplatform/metadata/experiment_run_resource.py @@ -757,12 +757,16 @@ def _create_context(): experiment_run._backing_tensorboard_run = None experiment_run._largest_step = None - if tensorboard: - cls._assign_backing_tensorboard( - self=experiment_run, tensorboard=tensorboard - ) - else: - cls._assign_to_experiment_backing_tensorboard(self=experiment_run) + try: + if tensorboard: + cls._assign_backing_tensorboard( + self=experiment_run, tensorboard=tensorboard + ) + else: + cls._assign_to_experiment_backing_tensorboard(self=experiment_run) + except Exception as e: + metadata_context.delete() + raise e experiment_run._associate_to_experiment(experiment) return experiment_run @@ -899,7 +903,12 @@ def assign_backing_tensorboard( backing_tensorboard = self._lookup_tensorboard_run_artifact() if backing_tensorboard: raise ValueError( - f"Experiment run {self._run_name} already associated to tensorboard resource {backing_tensorboard.resource.resource_name}" + f"Experiment run {self._run_name} already associated to tensorboard resource {backing_tensorboard.resource.resource_name}.\n" + f"To delete backing tensorboard run, execute the following:\n" + f'tensorboard_run_artifact = aiplatform.metadata.artifact.Artifact(artifact_name=f"{self._tensorboard_run_id(self._metadata_node.name)}")\n' + f'tensorboard_run_resource = aiplatform.TensorboardRun(tensorboard_run_artifact.metadata["resourceName"])\n' + f"tensorboard_run_resource.delete()\n" + f"tensorboard_run_artifact.delete()" ) self._assign_backing_tensorboard(tensorboard=tensorboard) @@ -1370,20 +1379,41 @@ def delete(self, *, delete_backing_tensorboard_run: bool = False): self._backing_tensorboard_run.resource.delete() self._backing_tensorboard_run.metadata.delete() else: - _LOGGER.warn( + _LOGGER.warning( f"Experiment run {self.name} does not have a backing tensorboard run." " Skipping deletion." ) else: - _LOGGER.warn( + _LOGGER.warning( f"Experiment run {self.name} does not have a backing tensorboard run." " Skipping deletion." ) + else: + _LOGGER.warning( + f"Experiment run {self.name} skipped backing tensorboard run deletion.\n" + f"To delete backing tensorboard run, execute the following:\n" + f'tensorboard_run_artifact = aiplatform.metadata.artifact.Artifact(artifact_name=f"{self._tensorboard_run_id(self._metadata_node.name)}")\n' + f'tensorboard_run_resource = aiplatform.TensorboardRun(tensorboard_run_artifact.metadata["resourceName"])\n' + f"tensorboard_run_resource.delete()\n" + f"tensorboard_run_artifact.delete()" + ) - self._metadata_node.delete() + try: + self._metadata_node.delete() + except exceptions.NotFound: + _LOGGER.warning( + f"Experiment run {self.name} metadata node not found." + " Skipping deletion." + ) if self._is_legacy_experiment_run(): - self._metadata_metric_artifact.delete() + try: + self._metadata_metric_artifact.delete() + except exceptions.NotFound: + _LOGGER.warning( + f"Experiment run {self.name} metadata node not found." + " Skipping deletion." + ) @_v1_not_supported def get_artifacts(self) -> List[artifact.Artifact]: diff --git a/tests/system/aiplatform/test_experiments.py b/tests/system/aiplatform/test_experiments.py index e9632fa078..3416578d65 100644 --- a/tests/system/aiplatform/test_experiments.py +++ b/tests/system/aiplatform/test_experiments.py @@ -444,7 +444,7 @@ def test_get_experiments_df(self): key=lambda d: d["run_name"], ) == sorted(df.fillna(0.0).to_dict("records"), key=lambda d: d["run_name"]) - def test_delete_run(self): + def test_delete_run_does_not_exist_raises_exception(self): run = aiplatform.ExperimentRun( run_name=_RUN, experiment=self._experiment_name, @@ -456,7 +456,113 @@ def test_delete_run(self): with pytest.raises(exceptions.NotFound): aiplatform.ExperimentRun(run_name=_RUN, experiment=self._experiment_name) - def test_delete_experiment(self): + def test_delete_run_success(self): + aiplatform.init( + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, + experiment=self._experiment_name, + ) + aiplatform.start_run(_RUN) + run = aiplatform.ExperimentRun( + run_name=_RUN, + experiment=self._experiment_name, + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, + ) + aiplatform.end_run() + + run.delete(delete_backing_tensorboard_run=True) + + with pytest.raises(exceptions.NotFound): + aiplatform.ExperimentRun( + run_name=_RUN, + experiment=self._experiment_name, + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, + ) + + def test_reuse_run_success(self): + aiplatform.init( + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, + experiment=self._experiment_name, + ) + aiplatform.start_run(_RUN) + run = aiplatform.ExperimentRun( + run_name=_RUN, + experiment=self._experiment_name, + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, + ) + aiplatform.end_run() + run.delete(delete_backing_tensorboard_run=True) + + aiplatform.start_run(_RUN) + aiplatform.end_run() + + run = aiplatform.ExperimentRun( + run_name=_RUN, + experiment=self._experiment_name, + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, + ) + assert run.name == _RUN + + def test_delete_run_then_tensorboard_success(self): + aiplatform.init( + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, + experiment=self._experiment_name, + ) + aiplatform.start_run(_RUN, resume=True) + run = aiplatform.ExperimentRun( + run_name=_RUN, + experiment=self._experiment_name, + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, + ) + aiplatform.end_run() + run.delete() + tensorboard_run_artifact = aiplatform.metadata.artifact.Artifact( + artifact_name=f"{self._experiment_name}-{_RUN}-tb-run" + ) + tensorboard_run_resource = aiplatform.TensorboardRun( + tensorboard_run_artifact.metadata["resourceName"] + ) + tensorboard_run_resource.delete() + tensorboard_run_artifact.delete() + + aiplatform.start_run(_RUN) + aiplatform.end_run() + + run = aiplatform.ExperimentRun( + run_name=_RUN, + experiment=self._experiment_name, + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, + ) + assert run.name == _RUN + + def test_delete_wout_backing_tensorboard_reuse_run_raises_exception(self): + aiplatform.init( + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, + experiment=self._experiment_name, + ) + aiplatform.start_run(_RUN, resume=True) + run = aiplatform.ExperimentRun( + run_name=_RUN, + experiment=self._experiment_name, + project=e2e_base._PROJECT, + location=e2e_base._LOCATION, + ) + aiplatform.end_run() + run.delete() + + with pytest.raises(ValueError): + aiplatform.start_run(_RUN) + + def test_delete_experiment_does_not_exist_raises_exception(self): experiment = aiplatform.Experiment( experiment_name=self._experiment_name, project=e2e_base._PROJECT,