From b3fb5223837b21fdd66d2adf67cec85e910581a7 Mon Sep 17 00:00:00 2001 From: Danielle Pintz Date: Sun, 21 May 2023 10:14:14 -0700 Subject: [PATCH] Fix module to device in AutoUnit Summary: In D46001765 the `self.device` reference was accidentally changed to `device`. Because of this the module isn't being moved to the device properly and we are seeing errors like: ``` ValueError: DistributedDataParallel device_ids and output_device arguments only work with single-device/multiple-device GPU modules or CPU modules, but got device_ids [1], output_device None, and module parameters {device(type='cpu')} ``` when running vise DDP. Differential Revision: D46056924 fbshipit-source-id: ce22d1efb542b57b951430f0c75d6b075b0e956e --- torchtnt/framework/auto_unit.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/torchtnt/framework/auto_unit.py b/torchtnt/framework/auto_unit.py index 2b809db847..66e7c9e131 100644 --- a/torchtnt/framework/auto_unit.py +++ b/torchtnt/framework/auto_unit.py @@ -259,7 +259,7 @@ def __init__( # remove ddp comm hook variables from params dict del params_dict["comm_state"] del params_dict["comm_hook"] - module = module.to(device) + module = module.to(self.device) module = DDP(module, device_ids=device_ids, **params_dict) if torchdynamo_params: # TODO: Add support for dynamo and DDP @@ -295,7 +295,7 @@ def __init__( **asdict(strategy), ) else: - module = module.to(device) + module = module.to(self.device) self.module: torch.nn.Module = module