From ab791591c8102c7c3755330b89e4203735979c7d Mon Sep 17 00:00:00 2001 From: Danielle Pintz Date: Sun, 21 May 2023 12:16:27 -0700 Subject: [PATCH] Fix module to device in AutoUnit (#398) Summary: Pull Request resolved: https://github.com/pytorch/tnt/pull/398 In D46001765 the `self.device` reference was accidentally changed to `device`. Because of this the module isn't being moved to the device properly and we are seeing errors like: ``` ValueError: DistributedDataParallel device_ids and output_device arguments only work with single-device/multiple-device GPU modules or CPU modules, but got device_ids [1], output_device None, and module parameters {device(type='cpu')} ``` when running vise DDP. Reviewed By: bobakfb Differential Revision: D46056924 fbshipit-source-id: d980909fa745161a800c72d91d849ee04b27aea7 --- torchtnt/framework/auto_unit.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torchtnt/framework/auto_unit.py b/torchtnt/framework/auto_unit.py index 2b809db847..66e7c9e131 100644 --- a/torchtnt/framework/auto_unit.py +++ b/torchtnt/framework/auto_unit.py @@ -259,7 +259,7 @@ def __init__( # remove ddp comm hook variables from params dict del params_dict["comm_state"] del params_dict["comm_hook"] - module = module.to(device) + module = module.to(self.device) module = DDP(module, device_ids=device_ids, **params_dict) if torchdynamo_params: # TODO: Add support for dynamo and DDP @@ -295,7 +295,7 @@ def __init__( **asdict(strategy), ) else: - module = module.to(device) + module = module.to(self.device) self.module: torch.nn.Module = module