From 1df73a17b0a6755b63263f38fb1a9e847132d9a0 Mon Sep 17 00:00:00 2001 From: Olatunji Ruwase Date: Mon, 3 Oct 2022 19:43:18 +0500 Subject: [PATCH] Checkpoint backwards-compatbility workaround --- deepspeed/runtime/bf16_optimizer.py | 5 ++++- deepspeed/utils/__init__.py | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/deepspeed/runtime/bf16_optimizer.py b/deepspeed/runtime/bf16_optimizer.py index 40b5b769bad1..216dbc35c9fd 100644 --- a/deepspeed/runtime/bf16_optimizer.py +++ b/deepspeed/runtime/bf16_optimizer.py @@ -4,6 +4,7 @@ from collections import OrderedDict import torch +import sys import os from deepspeed import comm as dist from deepspeed.runtime.constants import PIPE_REPLICATED @@ -21,7 +22,7 @@ is_model_parallel_parameter, see_memory_usage) -from deepspeed.utils import link_hp_params +from deepspeed.utils import link_hp_params, fragment_address from deepspeed.checkpoint import enable_universal_checkpoint from deepspeed.checkpoint.constants import (DS_VERSION, PARTITION_COUNT, @@ -31,6 +32,8 @@ GROUP_PADDINGS, PARAM_SLICE_MAPPINGS) +setattr(sys.modules[__name__], 'fragment_address', fragment_address) + class BF16_Optimizer(ZeROOptimizer): def __init__(self, diff --git a/deepspeed/utils/__init__.py b/deepspeed/utils/__init__.py index 6dd805b37844..6f71897e37a6 100644 --- a/deepspeed/utils/__init__.py +++ b/deepspeed/utils/__init__.py @@ -4,6 +4,6 @@ from .init_on_device import OnDevice from .groups import * from .nvtx import instrument_w_nvtx -from .tensor_fragment import tensor_fragment, get_full_hp_param, get_hp_fragment_mapping +from .tensor_fragment import tensor_fragment, get_full_hp_param, get_hp_fragment_mapping, fragment_address from .mixed_precision_linkage import link_hp_params from deepspeed.runtime.dataloader import RepeatingLoader