From dc6384dc01fd3cfe9c0694e609fa94d99dea2d96 Mon Sep 17 00:00:00 2001 From: Anna Shors <71393111+ashors1@users.noreply.github.com> Date: Wed, 31 Jul 2024 08:52:24 -0700 Subject: [PATCH] [NeMo-UX] Wrap task config save in a try/except (#9956) * wrap task config save in a try/except Signed-off-by: ashors1 * move fiddle import Signed-off-by: ashors1 --------- Signed-off-by: ashors1 --- nemo/lightning/nemo_logger.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/nemo/lightning/nemo_logger.py b/nemo/lightning/nemo_logger.py index 5ed783fdbefe..7c60599e29c3 100644 --- a/nemo/lightning/nemo_logger.py +++ b/nemo/lightning/nemo_logger.py @@ -7,7 +7,6 @@ import lightning_fabric as fl import pytorch_lightning as pl -from fiddle._src.experimental import serialization from pytorch_lightning.callbacks.model_checkpoint import ModelCheckpoint as PTLModelCheckpoint from pytorch_lightning.loggers import Logger, TensorBoardLogger, WandbLogger @@ -187,10 +186,15 @@ def _setup_trainer_model_checkpoint(self, trainer, log_dir, ckpt=None): ModelCheckpoint.CHECKPOINT_NAME_LAST = callback.filename + '-last' def _handle_task_config(self, task_config, log_dir): - task_config.save_config_img(log_dir / "task.png") - task_json = serialization.dump_json(task_config) - with open(log_dir / "task.json", "w") as f: - f.write(task_json) + try: + from fiddle._src.experimental import serialization + + task_config.save_config_img(log_dir / "task.png") + task_json = serialization.dump_json(task_config) + with open(log_dir / "task.json", "w") as f: + f.write(task_json) + except Exception as e: + logging.warning(f'Saving task config failed: {e}. Skipping saving') def _setup_file_logging(self, log_dir): """Set up file logging based on rank settings."""