diff --git a/csrc/adam/cpu_adam.cpp b/csrc/adam/cpu_adam.cpp index d425dc3169ef..6726b895f12c 100644 --- a/csrc/adam/cpu_adam.cpp +++ b/csrc/adam/cpu_adam.cpp @@ -672,6 +672,13 @@ int ds_adam_step_plus_copy(int optimizer_id, return 0; } +int destroy_adam_optimizer(int optimizer_id) +{ + s_optimizers.erase(optimizer_id); + + return 0; +} + PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) { m.def("adam_update", &ds_adam_step, "DeepSpeed CPU Adam update (C++)"); @@ -679,4 +686,5 @@ PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) &ds_adam_step_plus_copy, "DeepSpeed CPU Adam update and param copy (C++)"); m.def("create_adam", &create_adam_optimizer, "DeepSpeed CPU Adam (C++)"); + m.def("destroy_adam", &destroy_adam_optimizer, "DeepSpeed CPU Adam destroy (C++)"); } diff --git a/deepspeed/ops/adam/cpu_adam.py b/deepspeed/ops/adam/cpu_adam.py index 7977d232b1fa..35eeedb86b5d 100755 --- a/deepspeed/ops/adam/cpu_adam.py +++ b/deepspeed/ops/adam/cpu_adam.py @@ -85,6 +85,11 @@ def __init__(self, weight_decay, adamw_mode) + def __del__(self): + # need to destroy the C++ object explicitly to avoid a memory leak when deepspeed.initialize + # is used multiple times in the same process (notebook or pytest worker) + self.ds_opt_adam.destroy_adam(self.opt_id) + def __setstate__(self, state): super(DeepSpeedCPUAdam, self).__setstate__(state) for group in self.param_groups: