Skip to content

Commit 4e0993c

Browse files
Added hvd tests to checkpoint (#2192)
* added hvd tests to checkpoint * added ddp subtests to hvd tests * removed ddp sub-tests from hvd test Co-authored-by: vfdev <[email protected]>
1 parent f4b1821 commit 4e0993c

File tree

1 file changed

+23
-0
lines changed

1 file changed

+23
-0
lines changed

Diff for: tests/ignite/handlers/test_checkpoint.py

+23
Original file line numberDiff line numberDiff line change
@@ -1243,6 +1243,29 @@ def test_distrib_nccl_gpu(distributed_context_single_node_nccl, get_rank_zero_di
12431243
_test_checkpoint_load_objects_ddp(device=device)
12441244

12451245

1246+
@pytest.mark.distributed
1247+
@pytest.mark.skipif(not idist.has_hvd_support, reason="Skip if no Horovod dist support")
1248+
@pytest.mark.skipif("WORLD_SIZE" in os.environ, reason="Skip if launched as multiproc")
1249+
def test_distrib_hvd(gloo_hvd_executor, get_rank_zero_dirname):
1250+
1251+
device = torch.device("cpu" if not torch.cuda.is_available() else "cuda")
1252+
nproc = 4 if not torch.cuda.is_available() else torch.cuda.device_count()
1253+
dirname = get_rank_zero_dirname()
1254+
1255+
gloo_hvd_executor(
1256+
_test_save_model_optimizer_lr_scheduler_with_state_dict,
1257+
(device, os.path.join(dirname, "1")),
1258+
np=nproc,
1259+
do_init=True,
1260+
)
1261+
gloo_hvd_executor(
1262+
_test_save_model_optimizer_lr_scheduler_with_state_dict,
1263+
("cpu", os.path.join(dirname, "2"), True),
1264+
np=nproc,
1265+
do_init=True,
1266+
)
1267+
1268+
12461269
def _test_tpu_saves_to_cpu(device, dirname):
12471270
torch.manual_seed(0)
12481271

0 commit comments

Comments
 (0)