Skip to content

Commit

Permalink
feat: Allow reuse of deleted experiment run id.
Browse files Browse the repository at this point in the history
PiperOrigin-RevId: 591334624
  • Loading branch information
vertex-sdk-bot authored and copybara-github committed Dec 15, 2023
1 parent e50e49e commit 5f6ad8d
Show file tree
Hide file tree
Showing 3 changed files with 155 additions and 14 deletions.
7 changes: 6 additions & 1 deletion google/cloud/aiplatform/metadata/experiment_resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,12 @@ def delete(self, *, delete_backing_tensorboard_runs: bool = False):
experiment_run.delete(
delete_backing_tensorboard_run=delete_backing_tensorboard_runs
)
self._metadata_context.delete()
try:
self._metadata_context.delete()
except exceptions.NotFound:
_LOGGER.warning(
f"Experiment {self.name} metadata node not found. Skipping deletion."
)

def get_data_frame(self) -> "pd.DataFrame": # noqa: F821
"""Get parameters, metrics, and time series metrics of all runs in this experiment as Dataframe.
Expand Down
52 changes: 41 additions & 11 deletions google/cloud/aiplatform/metadata/experiment_run_resource.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,12 +757,16 @@ def _create_context():
experiment_run._backing_tensorboard_run = None
experiment_run._largest_step = None

if tensorboard:
cls._assign_backing_tensorboard(
self=experiment_run, tensorboard=tensorboard
)
else:
cls._assign_to_experiment_backing_tensorboard(self=experiment_run)
try:
if tensorboard:
cls._assign_backing_tensorboard(
self=experiment_run, tensorboard=tensorboard
)
else:
cls._assign_to_experiment_backing_tensorboard(self=experiment_run)
except Exception as e:
metadata_context.delete()
raise e

experiment_run._associate_to_experiment(experiment)
return experiment_run
Expand Down Expand Up @@ -899,7 +903,12 @@ def assign_backing_tensorboard(
backing_tensorboard = self._lookup_tensorboard_run_artifact()
if backing_tensorboard:
raise ValueError(
f"Experiment run {self._run_name} already associated to tensorboard resource {backing_tensorboard.resource.resource_name}"
f"Experiment run {self._run_name} already associated to tensorboard resource {backing_tensorboard.resource.resource_name}.\n"
f"To delete backing tensorboard run, execute the following:\n"
f'tensorboard_run_artifact = aiplatform.metadata.artifact.Artifact(artifact_name=f"{self._tensorboard_run_id(self._metadata_node.name)}")\n'
f'tensorboard_run_resource = aiplatform.TensorboardRun(tensorboard_run_artifact.metadata["resourceName"])\n'
f"tensorboard_run_resource.delete()\n"
f"tensorboard_run_artifact.delete()"
)

self._assign_backing_tensorboard(tensorboard=tensorboard)
Expand Down Expand Up @@ -1370,20 +1379,41 @@ def delete(self, *, delete_backing_tensorboard_run: bool = False):
self._backing_tensorboard_run.resource.delete()
self._backing_tensorboard_run.metadata.delete()
else:
_LOGGER.warn(
_LOGGER.warning(
f"Experiment run {self.name} does not have a backing tensorboard run."
" Skipping deletion."
)
else:
_LOGGER.warn(
_LOGGER.warning(
f"Experiment run {self.name} does not have a backing tensorboard run."
" Skipping deletion."
)
else:
_LOGGER.warning(
f"Experiment run {self.name} skipped backing tensorboard run deletion.\n"
f"To delete backing tensorboard run, execute the following:\n"
f'tensorboard_run_artifact = aiplatform.metadata.artifact.Artifact(artifact_name=f"{self._tensorboard_run_id(self._metadata_node.name)}")\n'
f'tensorboard_run_resource = aiplatform.TensorboardRun(tensorboard_run_artifact.metadata["resourceName"])\n'
f"tensorboard_run_resource.delete()\n"
f"tensorboard_run_artifact.delete()"
)

self._metadata_node.delete()
try:
self._metadata_node.delete()
except exceptions.NotFound:
_LOGGER.warning(
f"Experiment run {self.name} metadata node not found."
" Skipping deletion."
)

if self._is_legacy_experiment_run():
self._metadata_metric_artifact.delete()
try:
self._metadata_metric_artifact.delete()
except exceptions.NotFound:
_LOGGER.warning(
f"Experiment run {self.name} metadata node not found."
" Skipping deletion."
)

@_v1_not_supported
def get_artifacts(self) -> List[artifact.Artifact]:
Expand Down
110 changes: 108 additions & 2 deletions tests/system/aiplatform/test_experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,7 @@ def test_get_experiments_df(self):
key=lambda d: d["run_name"],
) == sorted(df.fillna(0.0).to_dict("records"), key=lambda d: d["run_name"])

def test_delete_run(self):
def test_delete_run_does_not_exist_raises_exception(self):
run = aiplatform.ExperimentRun(
run_name=_RUN,
experiment=self._experiment_name,
Expand All @@ -456,7 +456,113 @@ def test_delete_run(self):
with pytest.raises(exceptions.NotFound):
aiplatform.ExperimentRun(run_name=_RUN, experiment=self._experiment_name)

def test_delete_experiment(self):
def test_delete_run_success(self):
aiplatform.init(
project=e2e_base._PROJECT,
location=e2e_base._LOCATION,
experiment=self._experiment_name,
)
aiplatform.start_run(_RUN)
run = aiplatform.ExperimentRun(
run_name=_RUN,
experiment=self._experiment_name,
project=e2e_base._PROJECT,
location=e2e_base._LOCATION,
)
aiplatform.end_run()

run.delete(delete_backing_tensorboard_run=True)

with pytest.raises(exceptions.NotFound):
aiplatform.ExperimentRun(
run_name=_RUN,
experiment=self._experiment_name,
project=e2e_base._PROJECT,
location=e2e_base._LOCATION,
)

def test_reuse_run_success(self):
aiplatform.init(
project=e2e_base._PROJECT,
location=e2e_base._LOCATION,
experiment=self._experiment_name,
)
aiplatform.start_run(_RUN)
run = aiplatform.ExperimentRun(
run_name=_RUN,
experiment=self._experiment_name,
project=e2e_base._PROJECT,
location=e2e_base._LOCATION,
)
aiplatform.end_run()
run.delete(delete_backing_tensorboard_run=True)

aiplatform.start_run(_RUN)
aiplatform.end_run()

run = aiplatform.ExperimentRun(
run_name=_RUN,
experiment=self._experiment_name,
project=e2e_base._PROJECT,
location=e2e_base._LOCATION,
)
assert run.name == _RUN

def test_delete_run_then_tensorboard_success(self):
aiplatform.init(
project=e2e_base._PROJECT,
location=e2e_base._LOCATION,
experiment=self._experiment_name,
)
aiplatform.start_run(_RUN, resume=True)
run = aiplatform.ExperimentRun(
run_name=_RUN,
experiment=self._experiment_name,
project=e2e_base._PROJECT,
location=e2e_base._LOCATION,
)
aiplatform.end_run()
run.delete()
tensorboard_run_artifact = aiplatform.metadata.artifact.Artifact(
artifact_name=f"{self._experiment_name}-{_RUN}-tb-run"
)
tensorboard_run_resource = aiplatform.TensorboardRun(
tensorboard_run_artifact.metadata["resourceName"]
)
tensorboard_run_resource.delete()
tensorboard_run_artifact.delete()

aiplatform.start_run(_RUN)
aiplatform.end_run()

run = aiplatform.ExperimentRun(
run_name=_RUN,
experiment=self._experiment_name,
project=e2e_base._PROJECT,
location=e2e_base._LOCATION,
)
assert run.name == _RUN

def test_delete_wout_backing_tensorboard_reuse_run_raises_exception(self):
aiplatform.init(
project=e2e_base._PROJECT,
location=e2e_base._LOCATION,
experiment=self._experiment_name,
)
aiplatform.start_run(_RUN, resume=True)
run = aiplatform.ExperimentRun(
run_name=_RUN,
experiment=self._experiment_name,
project=e2e_base._PROJECT,
location=e2e_base._LOCATION,
)
aiplatform.end_run()
run.delete()

with pytest.raises(ValueError):
aiplatform.start_run(_RUN)

def test_delete_experiment_does_not_exist_raises_exception(self):
experiment = aiplatform.Experiment(
experiment_name=self._experiment_name,
project=e2e_base._PROJECT,
Expand Down

0 comments on commit 5f6ad8d

Please sign in to comment.